diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -29,7 +29,10 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -40,6 +43,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -87,11 +91,15 @@ cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE")); -// Temporary dummy option for tests. static cl::opt EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, cl::desc("Use the new MemorySSA-backed DSE.")); +static cl::opt + MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(100), cl::Hidden, + cl::desc("The number of memory instructions to scan for " + "dead store elimination (default = 100)")); + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// @@ -1354,22 +1362,479 @@ return MadeChange; } +namespace { +//============================================================================= +// MemorySSA backed dead store elimination. +// +// The code below implements dead store elimination using MemorySSA. It uses +// the following general approach: given a MemoryDef, walk upwards to find +// clobbering MemoryDefs that may be killed by the starting def. Then check +// that there are no uses that may read the location of the original MemoryDef +// in between both MemoryDefs. A bit more concretely: +// +// For all MemoryDefs StartDef: +// 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking upwards. +// 2. Check that there no reads between DomAccess and the StartDef by checking +// all uses starting at DomAccess and walking until we see StartDef. +// 3. For each found DomDef, check that: +// 1. There are no barrier instructions between DomDef and StartDef (like +// throws or stores with ordering constraints). +// 2. StartDef is executed whenever DomDef is executed. +// 3. StartDef completely overwrites DomDef. +// 4. Erase DomDef from the function and MemorySSA. + +// Returns true if \p M is an intrisnic that does not read or write memory. +bool isNoopIntrinsic(MemoryUseOrDef *M) { + if (const IntrinsicInst *II = dyn_cast(M->getMemoryInst())) { + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::assume: + return true; + case Intrinsic::dbg_addr: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_label: + case Intrinsic::dbg_value: + llvm_unreachable("Intrinsic should not be modeled in MemorySSA"); + default: + return false; + } + } + return false; +} + +// Check if we can ignore \p D for DSE. +bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { + Instruction *DI = D->getMemoryInst(); + // Calls that only access inaccessible memory cannot read or write any memory + // locations we consider for elimination. + if (auto CS = CallSite(DI)) + if (CS.onlyAccessesInaccessibleMemory()) + return true; + + // We can eliminate stores to locations not visible to the caller across + // throwing instructions. + if (DI->mayThrow() && !DefVisibleToCaller) + return true; + + // We can remove the dead stores, irrespective of the fence and its ordering + // (release/acquire/seq_cst). Fences only constraints the ordering of + // already visible stores, it does not make a store visible to other + // threads. So, skipping over a fence does not change a store from being + // dead. + if (isa(DI)) + return true; + + // Skip intrinsics that do not really read or modify memory. + if (isNoopIntrinsic(D)) + return true; + + return false; +} + +struct DSEState { + Function &F; + AliasAnalysis &AA; + MemorySSA &MSSA; + DominatorTree &DT; + PostDominatorTree &PDT; + const TargetLibraryInfo &TLI; + + // All MemoryDefs that potentially could kill other MemDefs. + SmallVector MemDefs; + // Any that should be skipped as they are already deleted + SmallPtrSet SkipStores; + // Keep track of all of the objects that are invisible to the caller until the + // function returns. + SmallPtrSet InvisibleToCaller; + // Keep track of blocks with throwing instructions not modeled in MemorySSA. + SmallPtrSet ThrowingBlocks; + + DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, + PostDominatorTree &PDT, const TargetLibraryInfo &TLI) + : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {} + + static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, + DominatorTree &DT, PostDominatorTree &PDT, + const TargetLibraryInfo &TLI) { + DSEState State(F, AA, MSSA, DT, PDT, TLI); + // Collect blocks with throwing instructions not modeled in MemroySSA and + // alloc-like objects. + for (Instruction &I : instructions(F)) { + if (I.mayThrow() && !MSSA.getMemoryAccess(&I)) + State.ThrowingBlocks.insert(I.getParent()); + + auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); + if (MD && hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) + State.MemDefs.push_back(MD); + + // Track alloca and alloca-like objects. Here we care about objects not + // visible to the caller during function execution. Alloca objects are + // invalid in the caller, for alloca-like objects we ensure that they are + // not captured throughout the function. + if (isa(&I) || + (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true))) + State.InvisibleToCaller.insert(&I); + } + // Treat byval or inalloca arguments the same as Allocas, stores to them are + // dead at the end of the function. + for (Argument &AI : F.args()) + if (AI.hasByValOrInAllocaAttr()) + State.InvisibleToCaller.insert(&AI); + return State; + } + + Optional getLocForWriteEx(Instruction *I) const { + if (!I->mayWriteToMemory()) + return None; + + if (auto *MTI = dyn_cast(I)) + return {MemoryLocation::getForDest(MTI)}; + + if (auto CS = CallSite(I)) { + if (Function *F = CS.getCalledFunction()) { + StringRef FnName = F->getName(); + if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy)) + return {MemoryLocation(CS.getArgument(0))}; + if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy)) + return {MemoryLocation(CS.getArgument(0))}; + if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat)) + return {MemoryLocation(CS.getArgument(0))}; + if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat)) + return {MemoryLocation(CS.getArgument(0))}; + } + return None; + } + + return MemoryLocation::getOrNone(I); + } + + // Returns true if \p Use may write to \p DefLoc. + bool isWriteClobber(MemoryLocation DefLoc, Instruction *UseInst) const { + if (!UseInst->mayWriteToMemory()) + return false; + + if (auto CS = CallSite(UseInst)) + if (CS.onlyAccessesInaccessibleMemory()) + return false; + + ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); + // If necessary, perform additional analysis. + if (isModSet(MR)) + MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); + MR = clearMust(MR); + return isModSet(MR); + } + + // Returns true if \p Use may read from \p DefLoc. + bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const { + if (!UseInst->mayReadFromMemory()) + return false; + + if (auto CS = CallSite(UseInst)) + if (CS.onlyAccessesInaccessibleMemory()) + return false; + + ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); + // If necessary, perform additional analysis. + if (isRefSet(MR)) + MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); + return isRefSet(MR); + } + + // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no read + // access in between or return None otherwise. The returned value may not (completely) + // overwrite \p DefLoc. Currently we bail out when we encounter any of the + // following + // * An aliasing MemoryUse (read). + // * A MemoryPHI + Optional getDomMemoryDef(MemoryAccess *Current, MemoryLocation DefLoc, + bool DefVisibleToCaller, int &ScanLimit) const { + MemoryDef *StartDef = cast(Current); + MemoryAccess *DomAccess; + MemoryDef *DomDef; + bool StepAgain; + LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current + << "\n"); + // Find the next clobbering Mod access for DefLoc, starting at Current. + do { + StepAgain = false; + // Reached TOP. + if (MSSA.isLiveOnEntryDef(Current)) + return None; + + MemoryUseOrDef *CurrentUD = dyn_cast(Current); + if (!CurrentUD) + return None; + + // Look for access that clobber DefLoc. + DomAccess = MSSA.getSkipSelfWalker()->getClobberingMemoryAccess( + CurrentUD->getDefiningAccess(), DefLoc); + DomDef = dyn_cast(DomAccess); + if (!DomDef || MSSA.isLiveOnEntryDef(DomDef)) + return None; + + // Check if we can skip DomDef for DSE. + if (canSkipDef(DomDef, DefVisibleToCaller)) { + StepAgain = true; + Current = DomDef; + } + } while (StepAgain); + + LLVM_DEBUG(dbgs() << " Checking for reads of " << *DomAccess); + if (isa(DomAccess)) + LLVM_DEBUG(dbgs() << " (" << *cast(DomAccess)->getMemoryInst() + << ")"); + LLVM_DEBUG(dbgs() << "\n"); + + SmallVector WorkList; + auto PushMemUses = [&WorkList](MemoryAccess *Acc) { + for (Use &U : Acc->uses()) + WorkList.push_back(cast(U.getUser())); + }; + PushMemUses(DomAccess); + + // Check if DomAccess may be read. + for (unsigned I = 0; I < WorkList.size(); I++) { + MemoryAccess *UseAccess = WorkList[I]; + + LLVM_DEBUG(dbgs() << " Checking use " << *UseAccess); + if (--ScanLimit == 0) { + LLVM_DEBUG(dbgs() << " ... hit scan limit\n"); + return None; + } + + // Bail out on MemoryPhis for now. + if (isa(UseAccess)) { + LLVM_DEBUG(dbgs() << " ... hit MemoryPhi\n"); + return None; + } + + Instruction *UseInst = cast(UseAccess)->getMemoryInst(); + LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n"); + + if (isNoopIntrinsic(cast(UseAccess))) { + PushMemUses(UseAccess); + continue; + } + + // Uses which may read the original MemoryDef mean we cannot eliminate the + // original MD. Stop walk. + if (isReadClobber(DefLoc, UseInst)) { + LLVM_DEBUG(dbgs() << " ... found read clobber\n"); + return None; + } + + if (StartDef == UseAccess) + continue; + + // Check all uses for MemoryDefs. We have to check uses of *all* + // MemoryDefs we discover, including non-aliasing ones. Otherwise we + // might miss cases like the following + // 1 = Def(LoE) ; <----- DomAccess stores [0,1] + // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] + // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. + // (The Use points to the *first* Def it may alias) + // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, stores [0,1] + if (MemoryDef *UseDef = dyn_cast(UseAccess)) { + PushMemUses(UseDef); + if (isWriteClobber(DefLoc, UseInst)) { + LLVM_DEBUG(dbgs() << " ... may-write clobber\n"); + return None; + } + } + } + + // No aliasing MemoryUses of DomAccess found, DomAccess is potentially dead. + return {DomAccess}; + } + + // Delete dead memory defs + void deleteDeadInstruction(Instruction *SI) { + MemorySSAUpdater Updater(&MSSA); + SmallVector NowDeadInsts; + NowDeadInsts.push_back(SI); + --NumFastOther; + + while (!NowDeadInsts.empty()) { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); + ++NumFastOther; + + // Remove the Instruction from MSSA. + if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { + Updater.removeMemoryAccess(MA); + if (MemoryDef *MD = dyn_cast(MA)) { + SkipStores.insert(MD); + } + } + + // Remove it's operands + for (Use &O : DeadInst->operands()) + if (Instruction *OpI = dyn_cast(O)) { + O = nullptr; + if (isInstructionTriviallyDead(OpI, &TLI)) + NowDeadInsts.push_back(OpI); + } + + DeadInst->eraseFromParent(); + } + } + + // Check for any extra throws between SI and NI that block DSE. This only + // checks extra maythrows (those that arn't MemoryDef's). MemoryDef that may + // throw are handled during the walk from one def to the next. + bool mayThrowBetween(Instruction *SI, Instruction *NI, + const Value *SILocUnd) const { + // First see if we can ignore it by using the fact that SI is an + // alloca/alloca like object that is not visible to the caller during + // execution of the function. + if (SILocUnd && InvisibleToCaller.count(SILocUnd)) + return false; + + if (SI->getParent() == NI->getParent()) + return ThrowingBlocks.find(SI->getParent()) != ThrowingBlocks.end(); + return !ThrowingBlocks.empty(); + } + + // Check if \p NI acts as a DSE barrier for \p SI. The following instructions + // act as barriers: + // * A memory instruction that may throw and \p SI accesses a non-stack + // object. + // * Atomic stores stronger that monotonic. + bool isDSEBarrier(Instruction *SI, MemoryLocation &SILoc, + const Value *SILocUnd, Instruction *NI, + MemoryLocation &NILoc) const { + // If NI may throw it acts as a barrier, unless we are to an alloca/alloca + // like object that does not escape. + if (NI->mayThrow() && !InvisibleToCaller.count(SILocUnd)) + return true; + + if (NI->isAtomic()) { + if (auto *NSI = dyn_cast(NI)) { + if (isStrongerThanMonotonic(NSI->getOrdering())) + return true; + } else + llvm_unreachable( + "Other instructions should be modeled/skipped in MemorySSA"); + } + + return false; + } +}; + +bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, + MemorySSA &MSSA, DominatorTree &DT, + PostDominatorTree &PDT, + const TargetLibraryInfo &TLI) { + const DataLayout &DL = F.getParent()->getDataLayout(); + bool MadeChange = false; + + DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); + // For each store: + for (unsigned I = 0; I < State.MemDefs.size(); I++) { + MemoryDef *Current = State.MemDefs[I]; + if (State.SkipStores.count(Current)) + continue; + Instruction *SI = cast(Current)->getMemoryInst(); + MemoryLocation SILoc = *State.getLocForWriteEx(SI); + assert(SILoc.Ptr && "SILoc should not be null"); + const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL); + Instruction *DefObj = + const_cast(dyn_cast(SILocUnd)); + bool DefVisibleToCaller = !State.InvisibleToCaller.count(SILocUnd); + if (DefObj && ((isAllocLikeFn(DefObj, &TLI) && + !PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT)))) + DefVisibleToCaller = false; + + LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " << *SI + << "\n"); + + int ScanLimit = MemorySSAScanLimit; + Optional Next; + // Walk MemorySSA upward to find MemoryDefs that might be killed by SI. + while ((Next = State.getDomMemoryDef(Current, SILoc, DefVisibleToCaller, + ScanLimit))) { + MemoryAccess *DomAccess = *Next; + LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess << "\n"); + MemoryDef *NextDef = dyn_cast(DomAccess); + Instruction *NI = NextDef->getMemoryInst(); + LLVM_DEBUG(dbgs() << " def " << *NI << "\n"); + + if (!hasAnalyzableMemoryWrite(NI, TLI)) + break; + MemoryLocation NILoc = *State.getLocForWriteEx(NI); + // Check for anything that looks like it will be a barrier to further + // removal + if (State.isDSEBarrier(SI, SILoc, SILocUnd, NI, NILoc)) { + LLVM_DEBUG(dbgs() << " stop, barrier\n"); + break; + } + + // We must post-dom the instructions to safely remove it. Continue checking from NextDef + if (!PDT.dominates(SI->getParent(), NextDef->getBlock())) { + Current = NextDef; + LLVM_DEBUG(dbgs() << " continue, not post-dominating!\n"); + continue; + } + + // Before we try to remove anything, check for any extra throwing + // instructions that block us from DSEing + if (State.mayThrowBetween(SI, NI, SILocUnd)) { + LLVM_DEBUG(dbgs() << " stop, may throw!\n"); + break; + } + + // Check if NI overwrites SI. + int64_t InstWriteOffset, DepWriteOffset; + InstOverlapIntervalsTy IOL; + OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, + InstWriteOffset, NI, IOL, AA, &F); + + if (OR == OW_Complete) { + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI + << "\n KILLER: " << *SI << '\n'); + State.deleteDeadInstruction(NI); + ++NumFastStores; + MadeChange = true; + } else + Current = NextDef; + } + } + + return MadeChange; +} +} // end anonymous namespace + //===----------------------------------------------------------------------===// // DSE Pass //===----------------------------------------------------------------------===// PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { - AliasAnalysis *AA = &AM.getResult(F); - DominatorTree *DT = &AM.getResult(F); - MemoryDependenceResults *MD = &AM.getResult(F); - const TargetLibraryInfo *TLI = &AM.getResult(F); + AliasAnalysis &AA = AM.getResult(F); + const TargetLibraryInfo &TLI = AM.getResult(F); + DominatorTree &DT = AM.getResult(F); + + if (EnableMemorySSA) { + MemorySSA &MSSA = AM.getResult(F).getMSSA(); + PostDominatorTree &PDT = AM.getResult(F); - if (!eliminateDeadStores(F, AA, MD, DT, TLI)) - return PreservedAnalyses::all(); + if (!eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI)) + return PreservedAnalyses::all(); + } else { + MemoryDependenceResults &MD = AM.getResult(F); + + if (!eliminateDeadStores(F, &AA, &MD, &DT, &TLI)) + return PreservedAnalyses::all(); + } PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); - PA.preserve(); + if (EnableMemorySSA) + PA.preserve(); + else + PA.preserve(); return PA; } @@ -1388,25 +1853,42 @@ if (skipFunction(F)) return false; - DominatorTree *DT = &getAnalysis().getDomTree(); - AliasAnalysis *AA = &getAnalysis().getAAResults(); - MemoryDependenceResults *MD = - &getAnalysis().getMemDep(); - const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(F); + AliasAnalysis &AA = getAnalysis().getAAResults(); + DominatorTree &DT = getAnalysis().getDomTree(); + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); - return eliminateDeadStores(F, AA, MD, DT, TLI); + if (EnableMemorySSA) { + MemorySSA &MSSA = getAnalysis().getMSSA(); + PostDominatorTree &PDT = + getAnalysis().getPostDomTree(); + + return eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); + } else { + MemoryDependenceResults &MD = + getAnalysis().getMemDep(); + + return eliminateDeadStores(F, &AA, &MD, &DT, &TLI); + } } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + + if (EnableMemorySSA) { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } else { + AU.addRequired(); + AU.addPreserved(); + } } }; @@ -1417,8 +1899,10 @@ INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/PartialStore.ll @@ -52,36 +52,3 @@ store double 0.0, double* %Q ret void } - -; PR8657 -declare void @test5a(i32*) -define void @test5(i32 %i) nounwind ssp { - %A = alloca i32 - %B = bitcast i32* %A to i8* - %C = getelementptr i8, i8* %B, i32 %i - store i8 10, i8* %C ;; Dead store to variable index. - store i32 20, i32* %A - - call void @test5a(i32* %A) - ret void -; CHECK-LABEL: @test5( -; CHECK-NEXT: alloca -; CHECK-NEXT: store i32 20 -; CHECK-NEXT: call void @test5a -} - -declare void @test5a_as1(i32*) -define void @test5_addrspacecast(i32 %i) nounwind ssp { - %A = alloca i32 - %B = addrspacecast i32* %A to i8 addrspace(1)* - %C = getelementptr i8, i8 addrspace(1)* %B, i32 %i - store i8 10, i8 addrspace(1)* %C ;; Dead store to variable index. - store i32 20, i32* %A - - call void @test5a(i32* %A) - ret void -; CHECK-LABEL: @test5_addrspacecast( -; CHECK-NEXT: alloca -; CHECK-NEXT: store i32 20 -; CHECK-NEXT: call void @test5a -} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll copy from llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll copy to llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll @@ -1,51 +1,6 @@ -; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s - -; We conservative choose to prevent dead store elimination -; across release or stronger fences. It's not required -; (since the must still be a race on %addd.i), but -; it is conservatively correct. A legal optimization -; could hoist the second store above the fence, and then -; DSE one of them. -define void @test1(i32* %addr.i) { -; CHECK-LABEL: @test1 -; CHECK: store i32 5 -; CHECK: fence -; CHECK: store i32 5 -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence release - store i32 5, i32* %addr.i, align 4 - ret void -} - -; Same as previous, but with different values. If we ever optimize -; this more aggressively, this allows us to check that the correct -; store is retained (the 'i32 1' store in this case) -define void @test1b(i32* %addr.i) { -; CHECK-LABEL: @test1b -; CHECK: store i32 42 -; CHECK: fence release -; CHECK: store i32 1 -; CHECK: ret - store i32 42, i32* %addr.i, align 4 - fence release - store i32 1, i32* %addr.i, align 4 - ret void -} +; XFAIL: * -; We *could* DSE across this fence, but don't. No other thread can -; observe the order of the acquire fence and the store. -define void @test2(i32* %addr.i) { -; CHECK-LABEL: @test2 -; CHECK: store -; CHECK: fence -; CHECK: store -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence acquire - store i32 5, i32* %addr.i, align 4 - ret void -} +; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s ; We DSE stack alloc'ed and byval locations, in the presence of fences. ; Fence does not make an otherwise thread local store visible. @@ -93,4 +48,3 @@ store i32 4, i32* %P1, align 4 ret void } - diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll @@ -46,51 +46,3 @@ store i32 5, i32* %addr.i, align 4 ret void } - -; We DSE stack alloc'ed and byval locations, in the presence of fences. -; Fence does not make an otherwise thread local store visible. -; Right now the DSE in presence of fence is only done in end blocks (with no successors), -; but the same logic applies to other basic blocks as well. -; The store to %addr.i can be removed since it is a byval attribute -define void @test3(i32* byval %addr.i) { -; CHECK-LABEL: @test3 -; CHECK-NOT: store -; CHECK: fence -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence release - ret void -} - -declare void @foo(i8* nocapture %p) - -declare noalias i8* @malloc(i32) - -; DSE of stores in locations allocated through library calls. -define void @test_nocapture() { -; CHECK-LABEL: @test_nocapture -; CHECK: malloc -; CHECK: foo -; CHECK-NOT: store -; CHECK: fence - %m = call i8* @malloc(i32 24) - call void @foo(i8* %m) - store i8 4, i8* %m - fence release - ret void -} - - -; This is a full fence, but it does not make a thread local store visible. -; We can DSE the store in presence of the fence. -define void @fence_seq_cst() { -; CHECK-LABEL: @fence_seq_cst -; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: ret void - %P1 = alloca i32 - store i32 0, i32* %P1, align 4 - fence seq_cst - store i32 4, i32* %P1, align 4 - ret void -} - diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck --check-prefix=NO-LIMIT %s +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=3 -S | FileCheck --check-prefix=LIMIT-3 %s +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=4 -S | FileCheck --check-prefix=LIMIT-4 %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + + +define void @test2(i32* noalias %P, i32* noalias %Q, i32* noalias %R) { +; +; NO-LIMIT-LABEL: @test2( +; NO-LIMIT-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; NO-LIMIT: bb1: +; NO-LIMIT-NEXT: br label [[BB3:%.*]] +; NO-LIMIT: bb2: +; NO-LIMIT-NEXT: br label [[BB3]] +; NO-LIMIT: bb3: +; NO-LIMIT-NEXT: store i32 0, i32* [[Q:%.*]] +; NO-LIMIT-NEXT: store i32 0, i32* [[R:%.*]] +; NO-LIMIT-NEXT: store i32 0, i32* [[P:%.*]] +; NO-LIMIT-NEXT: ret void +; +; LIMIT-0-LABEL: @test2( +; LIMIT-0-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-0: bb1: +; LIMIT-0-NEXT: br label [[BB3:%.*]] +; LIMIT-0: bb2: +; LIMIT-0-NEXT: br label [[BB3]] +; LIMIT-0: bb3: +; LIMIT-0-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-0-NEXT: store i32 0, i32* [[R:%.*]] +; LIMIT-0-NEXT: store i32 0, i32* [[P:%.*]] +; LIMIT-0-NEXT: ret void +; +; LIMIT-3-LABEL: @test2( +; LIMIT-3-NEXT: store i32 1, i32* [[P:%.*]] +; LIMIT-3-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-3: bb1: +; LIMIT-3-NEXT: br label [[BB3:%.*]] +; LIMIT-3: bb2: +; LIMIT-3-NEXT: br label [[BB3]] +; LIMIT-3: bb3: +; LIMIT-3-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-3-NEXT: store i32 0, i32* [[R:%.*]] +; LIMIT-3-NEXT: store i32 0, i32* [[P]] +; LIMIT-3-NEXT: ret void +; +; LIMIT-4-LABEL: @test2( +; LIMIT-4-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-4: bb1: +; LIMIT-4-NEXT: br label [[BB3:%.*]] +; LIMIT-4: bb2: +; LIMIT-4-NEXT: br label [[BB3]] +; LIMIT-4: bb3: +; LIMIT-4-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-4-NEXT: store i32 0, i32* [[R:%.*]] +; LIMIT-4-NEXT: store i32 0, i32* [[P:%.*]] +; LIMIT-4-NEXT: ret void +; + store i32 1, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 0, i32* %Q + store i32 0, i32* %R + store i32 0, i32* %P + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll @@ -59,8 +59,7 @@ ; Should not delete the volatile memset. define void @test17v(i8* %P, i8* %Q) nounwind ssp { ; CHECK-LABEL: @test17v( -; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 42, i64 8, i1 true) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true) @@ -74,8 +73,7 @@ ; Previously this was not allowed (PR8728), also discussed in PR11763. define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { ; CHECK-LABEL: @test18( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) @@ -85,8 +83,7 @@ define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp { ; CHECK-LABEL: @test18_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll @@ -25,7 +25,6 @@ define i8* @test_return_captures_2() { ; CHECK-LABEL: @test_return_captures_2( ; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) -; CHECK-NEXT: store i8 0, i8* [[M]] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: store i8 1, i8* [[M]] @@ -91,7 +90,6 @@ define i8* @test_malloc_capture_3() { ; CHECK-LABEL: @test_malloc_capture_3( ; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) -; CHECK-NEXT: store i8 0, i8* [[M]] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: store i8 1, i8* [[M]] @@ -191,7 +189,6 @@ define i8* @test_malloc_capture_7() { ; CHECK-LABEL: @test_malloc_capture_7( ; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) -; CHECK-NEXT: store i8 0, i8* [[M]] ; CHECK-NEXT: call void @may_throw() ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -215,10 +212,10 @@ define void @test_alloca_nocapture_1() { ; CHECK-LABEL: @test_alloca_nocapture_1( ; CHECK-NEXT: [[M:%.*]] = alloca i8 -; CHECK-NEXT: store i8 0, i8* [[M]] ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: +; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 @@ -240,6 +237,7 @@ ; CHECK-NEXT: call void @capture(i8* [[M]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: +; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 @@ -262,6 +260,7 @@ ; CHECK: exit: ; CHECK-NEXT: [[F_PTR:%.*]] = getelementptr [[S1:%.*]], %S1* [[E:%.*]], i32 0, i32 0 ; CHECK-NEXT: store i8* [[M]], i8** [[F_PTR]] +; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll @@ -30,6 +30,7 @@ ; CHECK-NEXT: [[C1:%.*]] = cleanuppad within none [] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: +; CHECK-NEXT: store i32 40, i32* [[SV]] ; CHECK-NEXT: ret void ; block1: diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll @@ -111,7 +111,6 @@ ; CHECK: for.body4.lr.ph: ; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]] -; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]] ; CHECK-NEXT: br label [[FOR_BODY4:%.*]] ; CHECK: for.body4: diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll @@ -32,14 +32,13 @@ define void @second_store_bigger(i32* noalias %P) { ; CHECK-LABEL: @second_store_bigger( -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[P_I64:%.*]] = bitcast i32* [[P]] to i64* +; CHECK-NEXT: [[P_I64:%.*]] = bitcast i32* [[P:%.*]] to i64* ; CHECK-NEXT: store i64 0, i64* [[P_I64]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll @@ -6,14 +6,13 @@ define void @test2(i32* noalias %P) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] ; CHECK-NEXT: ret void ; store i32 1, i32* %P @@ -53,7 +52,6 @@ define void @test7(i32* noalias %P, i32* noalias %Q) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: store i32 1, i32* [[Q:%.*]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[P:%.*]] @@ -61,7 +59,7 @@ ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[Q]] +; CHECK-NEXT: store i32 0, i32* [[Q:%.*]] ; CHECK-NEXT: store i32 0, i32* [[P]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -294,8 +294,7 @@ ; The memmove is dead, because memcpy arguments cannot overlap. define void @test38(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test38( -; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; @@ -307,8 +306,7 @@ ; The memmove is dead, because memcpy arguments cannot overlap. define void @test38_atomic(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test38_atomic( -; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; @@ -379,7 +377,9 @@ define void @test41(i32* noalias %P) { ; CHECK-LABEL: @test41( ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: store i32 2, i32* [[P]] ; CHECK-NEXT: call void @free(i8* [[P2]]) ; CHECK-NEXT: ret void ;