diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -29,7 +29,10 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -40,6 +43,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -87,6 +91,15 @@ cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE")); +static cl::opt + EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, + cl::desc("Use the new MemorySSA-backed DSE.")); + +static cl::opt MemorySSAScanLimit( + "dse-memoryssa-scanlimit", cl::init(100), cl::Hidden, + cl::desc("The number of memory instructions to scan dead store elimination " + "(default = 100)")); + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// @@ -1346,22 +1359,444 @@ return MadeChange; } +namespace { +//============================================================================= +// MemorySSA form of Dead store elimination. +// +// This uses MemorySSA to perform dead store elimination. MemorySSA will give +// us a graph of MemoryAccesses which will either be Defs (stores), Uses (loads) +// or Phis. The Uses will not be noalias of the Def they are a use of (unless +// we hit the memoryssa optimistaion limit). Unfortately we cannot say the same +// things of stores. So we walk forward from one store to the next, looking +// until we hit a store (or collection of stores) that cause the original to be +// dead. +// +// Because we are using MemorySSA we can also look across blocks. Anywhere where +// there are multiple possible successors we treat as a break. Otherwise, so +// long as the later store post-dominated the earlier one, the earlier one is +// dead and can be removed. +// +// There are numerous other things we need to handle along the way. +// - Any instruction that may throw usually acts a dse barrier (So long as the +// memory is non-escaping) +// - We have to be careful about stores that may also be self-reads. +// - Atomics can be optimised (especially unordered), but are often not worth +// it. We treat fences like maythrows so that the block dse in the same +// manner. +// +// The above MemDep based methods will eventually be removed (if they are unused +// by those below). + +enum class WalkType { Bad, Next }; +struct WalkResult { + WalkType Type; + MemoryDef *Next; + + operator bool() { return Type != WalkType::Bad; } +}; + +Optional getLocForWriteEx(Instruction *I, + const TargetLibraryInfo &TLI) { + if (!I->mayWriteToMemory()) + return None; + + if (auto *MTI = dyn_cast(I)) + return {MemoryLocation::getForDest(MTI)}; + + if (auto CS = CallSite(I)) { + if (Function *F = CS.getCalledFunction()) { + StringRef FnName = F->getName(); + if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy)) + return {MemoryLocation(CS.getArgument(0))}; + if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy)) + return {MemoryLocation(CS.getArgument(0))}; + if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat)) + return {MemoryLocation(CS.getArgument(0))}; + if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat)) + return {MemoryLocation(CS.getArgument(0))}; + } + return None; + } + + return MemoryLocation::getOrNone(I); +} + +Optional getLocForReadEx(Instruction *I, + const TargetLibraryInfo &TLI) { + if (!I->mayReadFromMemory()) + return None; + + if (auto *MTI = dyn_cast(I)) + return {MemoryLocation::getForSource(MTI)}; + + if (auto CS = CallSite(I)) { + if (Function *F = CS.getCalledFunction()) { + StringRef FnName = F->getName(); + if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy)) + return {MemoryLocation(CS.getArgument(1))}; + if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy)) + return {MemoryLocation(CS.getArgument(1))}; + if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat)) + return {MemoryLocation(CS.getArgument(1))}; + if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat)) + return {MemoryLocation(CS.getArgument(1))}; + } + return None; + } + + return MemoryLocation::getOrNone(I); +} + +// Returns true if \p Use may read from \p DefLoc. +bool isReadClobber(MemoryLocation DefLoc, MemoryUseOrDef *Use, + AliasAnalysis &AA, const TargetLibraryInfo &TLI) { + Instruction *UseInst = Use->getMemoryInst(); + if (!UseInst->mayReadFromMemory()) + return false; + + if (auto CS = CallSite(UseInst)) + if (CS.onlyAccessesInaccessibleMemory()) + return false; + + auto MaybeLoc = getLocForReadEx(UseInst, TLI); + if (MaybeLoc) + return AA.alias(DefLoc, *MaybeLoc); + + return true; +} + +// Returns true if \p Use may write to \p DefLoc. +bool isWriteClobber(MemoryLocation DefLoc, MemoryUseOrDef *Use, + AliasAnalysis &AA, const TargetLibraryInfo &TLI) { + Instruction *UseInst = Use->getMemoryInst(); + if (!UseInst->mayWriteToMemory()) + return false; + + if (auto CS = CallSite(UseInst)) + if (CS.onlyAccessesInaccessibleMemory()) + return false; + + auto MaybeLoc = getLocForWriteEx(UseInst, TLI); + if (MaybeLoc) + return AA.alias(DefLoc, *MaybeLoc); + + return true; +} + +// Returns true if \p M is an intrisnic that does not read or write memory. +bool isNoopIntrinsic(MemoryUseOrDef *M) { + if (const IntrinsicInst *II = dyn_cast(M->getMemoryInst())) { + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::assume: + case Intrinsic::dbg_addr: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_label: + case Intrinsic::dbg_value: + return true; + default: + return false; + } + } + return false; +} + +// Find the next MemoryDef clobbering Current, if there is any, skipping things +// that do not alias. WalkType::Bad indiciates that the walk hit one of the +// following: +// * An aliasing MemoryUse (read). +// * A MemoryPHI that comes before Current. +// * Multiple MemoryDefs, which indicates a split in the control flow. +WalkResult getNextMemoryDef(MemoryAccess *Current, MemoryDef *Original, + int ScanLimit, AliasAnalysis &AA, + const TargetLibraryInfo &TLI) { + if (--ScanLimit <= 0) + return {WalkType::Bad, nullptr}; + + MemoryAccess *NextAccess = nullptr; + MemoryLocation OriginalLoc = + *getLocForWriteEx(Original->getMemoryInst(), TLI); + // Scan the uses to see if we have a single interesting MemoryAccess. + for (Use &U : Current->uses()) { + MemoryAccess *UseAccess = cast(U.getUser()); + // Bail out on MemoryPhis for now. + if (isa(UseAccess)) + return {WalkType::Bad, nullptr}; + + LLVM_DEBUG(dbgs() << " Checking use " + << *cast(UseAccess)->getMemoryInst() + << "\n"); + // We can remove the dead stores, irrespective of the fence and its ordering + // (release/acquire/seq_cst). Fences only constraints the ordering of + // already visible stores, it does not make a store visible to other + // threads. So, skipping over a fence does not change a store from being + // dead. + if (isa(cast(UseAccess)->getMemoryInst())) + continue; + + if (isNoopIntrinsic(cast(UseAccess))) + continue; + + // Uses which may read the original MemoryDef mean we cannot eliminate the + // original MD. Stop walk. + if (isReadClobber(OriginalLoc, cast(UseAccess), AA, TLI)) + return {WalkType::Bad, nullptr}; + + if (isa(UseAccess)) { + // Skip non-aliasing MemoryUses. + continue; + } + + // Multiple definitions, bail out. + if (NextAccess) + return {WalkType::Bad, nullptr}; + NextAccess = UseAccess; + } + + // If we found a next MemoryAccess, we are done if it clobbers the original + // def or it is an unrelated MemoryAccess and we continue the walk starting at + // NextAccess. + if (NextAccess) { + assert(isa(NextAccess) && "Only MemoryDefs supported for now"); + if (isWriteClobber(OriginalLoc, cast(NextAccess), AA, TLI)) + return {WalkType::Next, cast(NextAccess)}; + return getNextMemoryDef(NextAccess, Original, ScanLimit, AA, TLI); + } + + // No aliasing definition found, bail out. + return {WalkType::Bad, nullptr}; +} + +// Delete dead memory defs +void deleteDeadInstruction(Instruction *SI, InstOverlapIntervalsTy &IOL, + MemorySSA &MSSA, const TargetLibraryInfo &TLI, + SmallPtrSetImpl &SkipStores) { + MemorySSAUpdater Updater(&MSSA); + SmallVector NowDeadInsts; + NowDeadInsts.push_back(SI); + --NumFastOther; + + while (!NowDeadInsts.empty()) { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); + ++NumFastOther; + + // Remove the Instruction from MSSA and IOL + if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { + Updater.removeMemoryAccess(MA); + if (MemoryDef *MD = dyn_cast(MA)) { + SkipStores.insert(MD); + } + } + IOL.erase(DeadInst); + + // Remove it's operands + for (Use &O : DeadInst->operands()) + if (Instruction *OpI = dyn_cast(O)) { + O = nullptr; + if (isInstructionTriviallyDead(OpI, &TLI)) + NowDeadInsts.push_back(OpI); + } + + DeadInst->eraseFromParent(); + } +} + +// Check for any extra throws between SI and NI that block DSE. This only +// checks extra maythrows (those that arn't MemoryDef's). MemoryDef that may +// throw are handled during the walk from one def to the next. +bool mayThrowBetween(Instruction *SI, Instruction *NI, const Value *SILocUnd, + SmallSetVector &InvisibleToCaller, + SmallPtrSetImpl &ThrowingBlocks) { + // First see if we can ignore it by using the fact that SI is an alloca/alloca + // like object that is not visible to the caller during execution of the + // function. + if (SILocUnd && InvisibleToCaller.count(SILocUnd)) + return false; + + if (SI->getParent() == NI->getParent()) + return ThrowingBlocks.find(SI->getParent()) != ThrowingBlocks.end(); + return !ThrowingBlocks.empty(); +} + +// Check if \p NI acts as a DSE barrier for \p SI. The following instructions +// act as barriers: +// * A memory instruction that may throw and \p SI accesses a non-stack object. +// * Atomic instructions stronger that monotonic. +bool isDSEBarrier(Instruction *SI, MemoryLocation &SILoc, const Value *SILocUnd, + Instruction *NI, MemoryLocation &NILoc, + SmallSetVector &InvisibleToCaller, + AliasAnalysis &AA, const TargetLibraryInfo &TLI) { + // If NI may throw it acts as a barrier, unless we are to an alloca/alloca + // like object that does not escape. + if (NI->mayThrow() && !InvisibleToCaller.count(SILocUnd)) + return true; + + if (NI->isAtomic()) { + if (auto *NSI = dyn_cast(NI)) { + if (isStrongerThanMonotonic(NSI->getOrdering())) + return true; + } else if (auto *NLI = dyn_cast(NI)) { + if (isStrongerThanMonotonic(NLI->getOrdering()) || + !AA.isNoAlias(MemoryLocation::get(NLI), SILoc)) + return true; + } else if (isa(NI)) + // We can remove the dead stores, irrespective of the fence and its + // ordering (release/acquire/seq_cst). Fences only constraints the + // ordering of already visible stores, it does not make a store visible to + // other threads. So, skipping over a fence does not change a store from + // being dead. We treat fences as maythrows, to be conservative so that + // they block optimizations in the same way + return false; + else + return true; + } + + return false; +} + +bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, + MemorySSA &MSSA, DominatorTree &DT, + PostDominatorTree &PDT, + const TargetLibraryInfo &TLI) { + const DataLayout &DL = F.getParent()->getDataLayout(); + bool MadeChange = false; + + // All MemoryDef stores + SmallVector Stores; + // Any that should be skipped as they are already deleted + SmallPtrSet SkipStores; + // A map of interval maps representing partially-overwritten value parts + InstOverlapIntervalsTy IOL; + // Keep track of all of the objects that are invisible to the caller until the + // function returns. + SmallSetVector InvisibleToCaller; + // Keep track of blocks with throwing instructions. + SmallPtrSet ThrowingBlocks; + + // Collect blocks with throwing instructions and alloc-like objects. + for (Instruction &I : instructions(F)) { + if (I.mayThrow()) + ThrowingBlocks.insert(I.getParent()); + + auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); + if (MD && hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) + Stores.push_back(MD); + + // Track alloca and alloca-like objects. Here we only care about pointers + // not escaping to the caller. For those it does not matter if throwing + // instructions are between 2 MemoryDefs, because function calls that may + // read or write memory are represented in MemorySSA. + if (isa(&I) || isAllocLikeFn(&I, &TLI)) + InvisibleToCaller.insert(&I); + } + // Treat byval or inalloca arguments the same as Allocas, stores to them are + // dead at the end of the function. + for (Argument &AI : F.args()) + if (AI.hasByValOrInAllocaAttr()) + InvisibleToCaller.insert(&AI); + + // For each store: + while (!Stores.empty()) { + MemoryDef *SIMD = Stores.back(); + Stores.pop_back(); + if (SkipStores.count(SIMD)) + continue; + Instruction *SI = SIMD->getMemoryInst(); + MemoryLocation SILoc = *getLocForWriteEx(SI, TLI); + assert(SILoc.Ptr && "SILoc should not be null"); + const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL); + + auto *SIDef = SIMD; + LLVM_DEBUG(dbgs() << "Trying to eliminate " << *SI << "\n"); + + // Walk MemorySSA forward to find a MemoryDef that kills SI. + WalkResult Next; + while ( + (Next = getNextMemoryDef(SIMD, SIDef, MemorySSAScanLimit, AA, TLI))) { + MemoryAccess *NextAccess = Next.Next; + LLVM_DEBUG(dbgs() << " Checking " << *NextAccess << "\n"); + MemoryDef *ND = dyn_cast(NextAccess); + Instruction *NI = ND->getMemoryInst(); + LLVM_DEBUG(dbgs() << " def " << *NI << "\n"); + + if (!hasAnalyzableMemoryWrite(NI, TLI)) + break; + MemoryLocation NILoc = *getLocForWriteEx(NI, TLI); + // Check for anything that looks like it will be a barrier to further + // removal + if (isDSEBarrier(SI, SILoc, SILocUnd, NI, NILoc, InvisibleToCaller, AA, + TLI)) { + LLVM_DEBUG(dbgs() << " stop, barrier\n"); + break; + } + + // We must post-dom the instructions to safely remove it + if (!PDT.dominates(ND->getBlock(), SIMD->getBlock())) { + SIMD = cast(NextAccess); + LLVM_DEBUG(dbgs() << " continue, not post-dominating!\n"); + continue; + } + + // Before we try to remove anything, check for any extra throwing + // instructions that block us from DSEing + if (mayThrowBetween(SI, NI, SILocUnd, InvisibleToCaller, + ThrowingBlocks)) { + LLVM_DEBUG(dbgs() << " stop, may throw!\n"); + break; + } + + // Get what type of overwrite this might be + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = isOverwrite(NILoc, SILoc, DL, TLI, DepWriteOffset, + InstWriteOffset, SI, IOL, AA, &F); + + if (OR == OW_Complete) { + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *SI + << "\n KILLER: " << *NI << '\n'); + deleteDeadInstruction(SI, IOL, MSSA, TLI, SkipStores); + ++NumFastStores; + MadeChange = true; + break; + } + SIMD = cast(NextAccess); + } + } + + return MadeChange; +} +} // end anonymous namespace + //===----------------------------------------------------------------------===// // DSE Pass //===----------------------------------------------------------------------===// PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { - AliasAnalysis *AA = &AM.getResult(F); - DominatorTree *DT = &AM.getResult(F); - MemoryDependenceResults *MD = &AM.getResult(F); - const TargetLibraryInfo *TLI = &AM.getResult(F); + AliasAnalysis &AA = AM.getResult(F); + const TargetLibraryInfo &TLI = AM.getResult(F); + DominatorTree &DT = AM.getResult(F); + + if (EnableMemorySSA) { + MemorySSA &MSSA = AM.getResult(F).getMSSA(); + PostDominatorTree &PDT = AM.getResult(F); - if (!eliminateDeadStores(F, AA, MD, DT, TLI)) - return PreservedAnalyses::all(); + if (!eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI)) + return PreservedAnalyses::all(); + } else { + MemoryDependenceResults &MD = AM.getResult(F); + + if (!eliminateDeadStores(F, &AA, &MD, &DT, &TLI)) + return PreservedAnalyses::all(); + } PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); - PA.preserve(); + if (EnableMemorySSA) + PA.preserve(); + else + PA.preserve(); return PA; } @@ -1380,25 +1815,42 @@ if (skipFunction(F)) return false; - DominatorTree *DT = &getAnalysis().getDomTree(); - AliasAnalysis *AA = &getAnalysis().getAAResults(); - MemoryDependenceResults *MD = - &getAnalysis().getMemDep(); - const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(F); + AliasAnalysis &AA = getAnalysis().getAAResults(); + DominatorTree &DT = getAnalysis().getDomTree(); + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); + + if (EnableMemorySSA) { + MemorySSA &MSSA = getAnalysis().getMSSA(); + PostDominatorTree &PDT = + getAnalysis().getPostDomTree(); - return eliminateDeadStores(F, AA, MD, DT, TLI); + return eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); + } else { + MemoryDependenceResults &MD = + getAnalysis().getMemDep(); + + return eliminateDeadStores(F, &AA, &MD, &DT, &TLI); + } } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + + if (EnableMemorySSA) { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } else { + AU.addRequired(); + AU.addPreserved(); + } } }; @@ -1409,8 +1861,10 @@ INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll copy from llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll copy to llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll @@ -1,51 +1,6 @@ -; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s - -; We conservative choose to prevent dead store elimination -; across release or stronger fences. It's not required -; (since the must still be a race on %addd.i), but -; it is conservatively correct. A legal optimization -; could hoist the second store above the fence, and then -; DSE one of them. -define void @test1(i32* %addr.i) { -; CHECK-LABEL: @test1 -; CHECK: store i32 5 -; CHECK: fence -; CHECK: store i32 5 -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence release - store i32 5, i32* %addr.i, align 4 - ret void -} - -; Same as previous, but with different values. If we ever optimize -; this more aggressively, this allows us to check that the correct -; store is retained (the 'i32 1' store in this case) -define void @test1b(i32* %addr.i) { -; CHECK-LABEL: @test1b -; CHECK: store i32 42 -; CHECK: fence release -; CHECK: store i32 1 -; CHECK: ret - store i32 42, i32* %addr.i, align 4 - fence release - store i32 1, i32* %addr.i, align 4 - ret void -} +; XFAIL: * -; We *could* DSE across this fence, but don't. No other thread can -; observe the order of the acquire fence and the store. -define void @test2(i32* %addr.i) { -; CHECK-LABEL: @test2 -; CHECK: store -; CHECK: fence -; CHECK: store -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence acquire - store i32 5, i32* %addr.i, align 4 - ret void -} +; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s ; We DSE stack alloc'ed and byval locations, in the presence of fences. ; Fence does not make an otherwise thread local store visible. @@ -93,4 +48,3 @@ store i32 4, i32* %P1, align 4 ret void } - diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll @@ -46,51 +46,3 @@ store i32 5, i32* %addr.i, align 4 ret void } - -; We DSE stack alloc'ed and byval locations, in the presence of fences. -; Fence does not make an otherwise thread local store visible. -; Right now the DSE in presence of fence is only done in end blocks (with no successors), -; but the same logic applies to other basic blocks as well. -; The store to %addr.i can be removed since it is a byval attribute -define void @test3(i32* byval %addr.i) { -; CHECK-LABEL: @test3 -; CHECK-NOT: store -; CHECK: fence -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence release - ret void -} - -declare void @foo(i8* nocapture %p) - -declare noalias i8* @malloc(i32) - -; DSE of stores in locations allocated through library calls. -define void @test_nocapture() { -; CHECK-LABEL: @test_nocapture -; CHECK: malloc -; CHECK: foo -; CHECK-NOT: store -; CHECK: fence - %m = call i8* @malloc(i32 24) - call void @foo(i8* %m) - store i8 4, i8* %m - fence release - ret void -} - - -; This is a full fence, but it does not make a thread local store visible. -; We can DSE the store in presence of the fence. -define void @fence_seq_cst() { -; CHECK-LABEL: @fence_seq_cst -; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: ret void - %P1 = alloca i32 - store i32 0, i32* %P1, align 4 - fence seq_cst - store i32 4, i32* %P1, align 4 - ret void -} - diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll @@ -0,0 +1,189 @@ +; XFAIL: * + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind +declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind +declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind + +; PR8701 + +;; Fully dead overwrite of memcpy. +define void @test15(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test15( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + ret void +} + +;; Fully dead overwrite of memcpy. +define void @test15_atomic(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test15_atomic( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +;; Fully dead overwrite of memcpy. +define void @test15_atomic_weaker(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test15_atomic_weaker( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +;; Fully dead overwrite of memcpy. +define void @test15_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test15_atomic_weaker_2( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) + ret void +} + +;; Full overwrite of smaller memcpy. +define void @test16(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test16( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + ret void +} + +;; Full overwrite of smaller memcpy. +define void @test16_atomic(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test16_atomic( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +;; Full overwrite of smaller memory where overwrite has stronger atomicity +define void @test16_atomic_weaker(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test16_atomic_weaker( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i1 false) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +;; Full overwrite of smaller memory where overwrite has weaker atomicity. +define void @test16_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test16_atomic_weaker_2( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) + ret void +} + +;; Overwrite of memset by memcpy. +define void @test17(i8* %P, i8* noalias %Q) nounwind ssp { +; CHECK-LABEL: @test17( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + ret void +} + +;; Overwrite of memset by memcpy. +define void @test17_atomic(i8* %P, i8* noalias %Q) nounwind ssp { +; CHECK-LABEL: @test17_atomic( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +;; Overwrite of memset by memcpy. Overwrite is stronger atomicity. We can +;; remove the memset. +define void @test17_atomic_weaker(i8* %P, i8* noalias %Q) nounwind ssp { +; CHECK-LABEL: @test17_atomic_weaker( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i1 false) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +;; Overwrite of memset by memcpy. Overwrite is weaker atomicity. We can remove +;; the memset. +define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp { +; CHECK-LABEL: @test17_atomic_weaker_2( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) + ret void +} + +; Should not delete the volatile memset. +define void @test17v(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test17v( +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 42, i64 8, i1 true) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + ret void +} + +; PR8728 +; Do not delete instruction where possible situation is: +; A = B +; A = A +; +; NB! See PR11763 - currently LLVM allows memcpy's source and destination to be +; equal (but not inequal and overlapping). +define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { +; CHECK-LABEL: @test18( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) + ret void +} + +define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp { +; CHECK-LABEL: @test18_atomic( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1) + ret void +} + diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-lifetimes.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-lifetimes.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-lifetimes.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i64 } +%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* } +%struct.Patient = type { i32, i32, i32, %struct.Village* } +%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List } + +declare %struct.Village* @alloc(%struct.Village*) + +define i8* @alloc_tree() { +; CHECK-LABEL: @alloc_tree( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FVAL:%.*]] = alloca [4 x %struct.Village*], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x %struct.Village*]* [[FVAL]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull [[TMP0]]) +; CHECK-NEXT: [[CALL:%.*]] = tail call dereferenceable_or_null(192) i8* @malloc(i64 192) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[CALL]] to %struct.Village* +; CHECK-NEXT: [[CALL3:%.*]] = tail call %struct.Village* @alloc(%struct.Village* [[TMP1]]) +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.Village*], [4 x %struct.Village*]* [[FVAL]], i64 0, i64 3 +; CHECK-NEXT: store %struct.Village* [[CALL3]], %struct.Village** [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[CALL3_1:%.*]] = tail call %struct.Village* @alloc(%struct.Village* [[TMP1]]) +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [4 x %struct.Village*], [4 x %struct.Village*]* [[FVAL]], i64 0, i64 2 +; CHECK-NEXT: store %struct.Village* [[CALL3_1]], %struct.Village** [[ARRAYIDX_1]], align 16 +; CHECK-NEXT: [[CALL3_2:%.*]] = tail call %struct.Village* @alloc(%struct.Village* [[TMP1]]) +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [4 x %struct.Village*], [4 x %struct.Village*]* [[FVAL]], i64 0, i64 1 +; CHECK-NEXT: store %struct.Village* [[CALL3_2]], %struct.Village** [[ARRAYIDX_2]], align 8 +; CHECK-NEXT: [[CALL3_3:%.*]] = tail call %struct.Village* @alloc(%struct.Village* [[TMP1]]) +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [4 x %struct.Village*], [4 x %struct.Village*]* [[FVAL]], i64 0, i64 0 +; CHECK-NEXT: store %struct.Village* [[CALL3_3]], %struct.Village** [[ARRAYIDX_3]], align 16 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(32) [[CALL]], i8* nonnull align 16 dereferenceable(32) [[TMP0]], i64 32, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull [[TMP0]]) +; CHECK-NEXT: ret i8* [[CALL]] +; +entry: + %fval = alloca [4 x %struct.Village*], align 16 + %0 = bitcast [4 x %struct.Village*]* %fval to i8* + call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %0) #7 + %call = tail call dereferenceable_or_null(192) i8* @malloc(i64 192) #8 + %1 = bitcast i8* %call to %struct.Village* + %call3 = tail call %struct.Village* @alloc(%struct.Village* %1) + %arrayidx = getelementptr inbounds [4 x %struct.Village*], [4 x %struct.Village*]* %fval, i64 0, i64 3 + store %struct.Village* %call3, %struct.Village** %arrayidx, align 8 + %call3.1 = tail call %struct.Village* @alloc(%struct.Village* %1) + %arrayidx.1 = getelementptr inbounds [4 x %struct.Village*], [4 x %struct.Village*]* %fval, i64 0, i64 2 + store %struct.Village* %call3.1, %struct.Village** %arrayidx.1, align 16 + %call3.2 = tail call %struct.Village* @alloc(%struct.Village* %1) + %arrayidx.2 = getelementptr inbounds [4 x %struct.Village*], [4 x %struct.Village*]* %fval, i64 0, i64 1 + store %struct.Village* %call3.2, %struct.Village** %arrayidx.2, align 8 + %call3.3 = tail call %struct.Village* @alloc(%struct.Village* %1) + %arrayidx.3 = getelementptr inbounds [4 x %struct.Village*], [4 x %struct.Village*]* %fval, i64 0, i64 0 + store %struct.Village* %call3.3, %struct.Village** %arrayidx.3, align 16 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(32) %call, i8* nonnull align 16 dereferenceable(32) %0, i64 32, i1 false) + call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %0) #7 + ret i8* %call +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +declare noalias i8* @malloc(i64) +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck --check-prefix=NO-LIMIT %s +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=3 -S | FileCheck --check-prefix=LIMIT-3 %s +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=4 -S | FileCheck --check-prefix=LIMIT-4 %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + + +define void @test2(i32* noalias %P, i32* noalias %Q, i32* noalias %R) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; +; NO-LIMIT-LABEL: @test2( +; NO-LIMIT-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; NO-LIMIT: bb1: +; NO-LIMIT-NEXT: br label [[BB3:%.*]] +; NO-LIMIT: bb2: +; NO-LIMIT-NEXT: br label [[BB3]] +; NO-LIMIT: bb3: +; NO-LIMIT-NEXT: store i32 0, i32* [[Q:%.*]] +; NO-LIMIT-NEXT: store i32 0, i32* [[R:%.*]] +; NO-LIMIT-NEXT: store i32 0, i32* [[P:%.*]] +; NO-LIMIT-NEXT: ret void +; +; LIMIT-0-LABEL: @test2( +; LIMIT-0-NEXT: store i32 1, i32* [[P:%.*]] +; LIMIT-0-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-0: bb1: +; LIMIT-0-NEXT: br label [[BB3:%.*]] +; LIMIT-0: bb2: +; LIMIT-0-NEXT: br label [[BB3]] +; LIMIT-0: bb3: +; LIMIT-0-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-0-NEXT: store i32 0, i32* [[R:%.*]] +; LIMIT-0-NEXT: store i32 0, i32* [[P]] +; LIMIT-0-NEXT: ret void +; +; LIMIT-3-LABEL: @test2( +; LIMIT-3-NEXT: store i32 1, i32* [[P:%.*]] +; LIMIT-3-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-3: bb1: +; LIMIT-3-NEXT: br label [[BB3:%.*]] +; LIMIT-3: bb2: +; LIMIT-3-NEXT: br label [[BB3]] +; LIMIT-3: bb3: +; LIMIT-3-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-3-NEXT: store i32 0, i32* [[R:%.*]] +; LIMIT-3-NEXT: store i32 0, i32* [[P]] +; LIMIT-3-NEXT: ret void +; +; LIMIT-4-LABEL: @test2( +; LIMIT-4-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-4: bb1: +; LIMIT-4-NEXT: br label [[BB3:%.*]] +; LIMIT-4: bb2: +; LIMIT-4-NEXT: br label [[BB3]] +; LIMIT-4: bb3: +; LIMIT-4-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-4-NEXT: store i32 0, i32* [[R:%.*]] +; LIMIT-4-NEXT: store i32 0, i32* [[P:%.*]] +; LIMIT-4-NEXT: ret void +; +; LIMIT-2-LABEL: @test2( +; LIMIT-2-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-2: bb1: +; LIMIT-2-NEXT: br label [[BB3:%.*]] +; LIMIT-2: bb2: +; LIMIT-2-NEXT: br label [[BB3]] +; LIMIT-2: bb3: +; LIMIT-2-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-2-NEXT: store i32 0, i32* [[P:%.*]] +; LIMIT-2-NEXT: ret void + store i32 1, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 0, i32* %Q + store i32 0, i32* %R + store i32 0, i32* %P + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind +declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind +declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind +declare void @llvm.init.trampoline(i8*, i8*, i8*) + + +;; Overwrite of memset by memcpy. +define void @test17(i8* %P, i8* noalias %Q) nounwind ssp { +; CHECK-LABEL: @test17( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + ret void +} + +;; Overwrite of memset by memcpy. +define void @test17_atomic(i8* %P, i8* noalias %Q) nounwind ssp { +; CHECK-LABEL: @test17_atomic( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +;; Overwrite of memset by memcpy. Overwrite is stronger atomicity. We can +;; remove the memset. +define void @test17_atomic_weaker(i8* %P, i8* noalias %Q) nounwind ssp { +; CHECK-LABEL: @test17_atomic_weaker( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i1 false) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + ret void +} + +;; Overwrite of memset by memcpy. Overwrite is weaker atomicity. We can remove +;; the memset. +define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp { +; CHECK-LABEL: @test17_atomic_weaker_2( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) + ret void +} + +; Should not delete the volatile memset. +define void @test17v(i8* %P, i8* %Q) nounwind ssp { +; CHECK-LABEL: @test17v( +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 42, i64 8, i1 true) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + ret void +} + +; PR8728 +; Do not delete instruction where possible situation is: +; A = B +; A = A +; +; NB! See PR11763 - currently LLVM allows memcpy's source and destination to be +; equal (but not inequal and overlapping). +define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { +; CHECK-LABEL: @test18( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) + ret void +} + +define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp { +; CHECK-LABEL: @test18_atomic( +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1) + ret void +} + + + diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll @@ -0,0 +1,174 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +declare noalias i8* @malloc(i64) + +declare void @foo() +declare void @capture(i8*) + +; Check that we do not remove the second store, as %m is returned. +define i8* @test_return_captures_1() { +; CHECK-LABEL: @test_return_captures_1( +; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) +; CHECK-NEXT: store i8 1, i8* [[M]] +; CHECK-NEXT: ret i8* [[M]] +; + %m = call i8* @malloc(i64 24) + store i8 0, i8* %m + store i8 1, i8* %m + ret i8* %m +} + +; Same as @test_return_captures_1, but across BBs. +define i8* @test_return_captures_2() { +; CHECK-LABEL: @test_return_captures_2( +; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: store i8 1, i8* [[M]] +; CHECK-NEXT: ret i8* [[M]] +; + %m = call i8* @malloc(i64 24) + store i8 0, i8* %m + br label %exit + +exit: + store i8 1, i8* %m + ret i8* %m +} + +; Check we do not eliminate either store. The first one cannot be eliminated, +; due to the call of @capture. The second one because %m escapes. +define i8* @test_malloc_capture_1() { +; CHECK-LABEL: @test_malloc_capture_1( +; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) +; CHECK-NEXT: store i8 0, i8* [[M]] +; CHECK-NEXT: call void @capture(i8* [[M]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: store i8 1, i8* [[M]] +; CHECK-NEXT: ret i8* [[M]] +; + %m = call i8* @malloc(i64 24) + store i8 0, i8* %m + call void @capture(i8* %m) + br label %exit + +exit: + store i8 1, i8* %m + ret i8* %m +} + +; We can remove the first store store i8 0, i8* %m because %m escapes after the +; killing store. +define i8* @tesh_malloc_capture_2() { +; CHECK-LABEL: @tesh_malloc_capture_2( +; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: store i8 1, i8* [[M]] +; CHECK-NEXT: call void @capture(i8* [[M]]) +; CHECK-NEXT: ret i8* [[M]] +; + %m = call i8* @malloc(i64 24) + store i8 0, i8* %m + br label %exit + +exit: + store i8 1, i8* %m + call void @capture(i8* %m) + ret i8* %m +} + +; TODO: Remove store in exit. +; Stores to stack objects can be eliminated if they are not captured inside the function. +define void @test_alloca_nocapture_1() { +; CHECK-LABEL: @test_alloca_nocapture_1( +; CHECK-NEXT: [[M:%.*]] = alloca i8 +; CHECK-NEXT: store i8 0, i8* [[M]] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: store i8 1, i8* [[M]] +; CHECK-NEXT: ret void +; + %m = alloca i8 + store i8 0, i8* %m + call void @foo() + br label %exit + +exit: + store i8 1, i8* %m + ret void +} + +; TODO: Remove store in exit. +; Cannot remove first store i8 0, i8* %m, as the call to @capture captures the object. +define void @test_alloca_capture_1() { +; CHECK-LABEL: @test_alloca_capture_1( +; CHECK-NEXT: [[M:%.*]] = alloca i8 +; CHECK-NEXT: store i8 0, i8* [[M]] +; CHECK-NEXT: call void @capture(i8* [[M]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: store i8 1, i8* [[M]] +; CHECK-NEXT: ret void +; + %m = alloca i8 + store i8 0, i8* %m + call void @capture(i8* %m) + br label %exit + +exit: + store i8 1, i8* %m + ret void +} + + +%S1 = type { i8 * } + +; TODO: Remove store at exit. +; We can remove the last store to %m, even though it escapes because the alloca +; becomes invalid after the function returns. +define void @test_alloca_capture_2(%S1* %E) { +; CHECK-LABEL: @test_alloca_capture_2( +; CHECK-NEXT: [[M:%.*]] = alloca i8 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[F_PTR:%.*]] = getelementptr [[S1:%.*]], %S1* [[E:%.*]], i32 0, i32 0 +; CHECK-NEXT: store i8* [[M]], i8** [[F_PTR]] +; CHECK-NEXT: store i8 1, i8* [[M]] +; CHECK-NEXT: ret void +; + %m = alloca i8 + br label %exit + +exit: + %f.ptr = getelementptr %S1, %S1* %E, i32 0, i32 0 + store i8* %m, i8** %f.ptr + store i8 1, i8* %m + ret void +} + +; We cannot remove the last store to %m, because it escapes by storing it to %E. +define void @test_malloc_capture_2(%S1* %E) { +; CHECK-LABEL: @test_malloc_capture_2( +; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[F_PTR:%.*]] = getelementptr [[S1:%.*]], %S1* [[E:%.*]], i32 0, i32 0 +; CHECK-NEXT: store i8* [[M]], i8** [[F_PTR]] +; CHECK-NEXT: store i8 1, i8* [[M]] +; CHECK-NEXT: ret void +; + %m = call i8* @malloc(i64 24) + br label %exit + +exit: + %f.ptr = getelementptr %S1, %S1* %E, i32 0, i32 0 + store i8* %m, i8** %f.ptr + store i8 1, i8* %m + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +declare void @f() +declare i32 @__CxxFrameHandler3(...) + + +; Make sure we do not eliminate `store i32 20, i32* %sv`. Even though it is a store +; to a stack object, we can read it in the landing/catchpad. +define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 { +; CHECK-LABEL: @test12( +; CHECK-NEXT: block1: +; CHECK-NEXT: [[SV:%.*]] = alloca i32 +; CHECK-NEXT: br label [[BLOCK2:%.*]] +; CHECK: block2: +; CHECK-NEXT: store i32 20, i32* [[SV]] +; CHECK-NEXT: invoke void @f() +; CHECK-NEXT: to label [[BLOCK3:%.*]] unwind label [[CATCH_DISPATCH:%.*]] +; CHECK: block3: +; CHECK-NEXT: store i32 30, i32* [[SV]] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: catch.dispatch: +; CHECK-NEXT: [[CS1:%.*]] = catchswitch within none [label %catch] unwind label [[CLEANUP:%.*]] +; CHECK: catch: +; CHECK-NEXT: [[C:%.*]] = catchpad within [[CS1]] [] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[SV]] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: cleanup: +; CHECK-NEXT: [[C1:%.*]] = cleanuppad within none [] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: store i32 40, i32* [[SV]] +; CHECK-NEXT: ret void +; +block1: + %sv = alloca i32 + br label %block2 + +block2: + store i32 20, i32* %sv + invoke void @f() + to label %block3 unwind label %catch.dispatch + +block3: + store i32 30, i32* %sv + br label %exit + +catch.dispatch: + %cs1 = catchswitch within none [label %catch] unwind label %cleanup + +catch: + %c = catchpad within %cs1 [] + %lv = load i32, i32* %sv + br label %exit + +cleanup: + %c1 = cleanuppad within none [] + br label %exit + +exit: + store i32 40, i32* %sv + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll @@ -0,0 +1,171 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +define void @test13(i32* noalias %P) { +; CHECK-LABEL: @test13( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + br label %for +for: + store i32 0, i32* %P + br i1 false, label %for, label %end +end: + ret void +} + + +define void @test14(i32* noalias %P) { +; CHECK-LABEL: @test14( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 1, i32* [[P:%.*]] +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + store i32 1, i32* %P + br label %for +for: + store i32 0, i32* %P + br i1 false, label %for, label %end +end: + ret void +} + +define void @test18(i32* noalias %P) { +; CHECK-LABEL: @test18( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: store i8 1, i8* [[P2]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: store i8 2, i8* [[P2]] +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %P2 = bitcast i32* %P to i8* + store i32 0, i32* %P + br label %for +for: + store i8 1, i8* %P2 + %x = load i32, i32* %P + store i8 2, i8* %P2 + br i1 false, label %for, label %end +end: + ret void +} + +define void @test21(i32* noalias %P) { +; CHECK-LABEL: @test21( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) + br label %for +for: + %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 1, i32* %arrayidx1, align 4 + br i1 false, label %for, label %end +end: + ret void +} + +define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %x, i32* noalias nocapture %b) local_unnamed_addr { +; CHECK-LABEL: @test_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP27]], label [[FOR_BODY4_LR_PH_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body4.lr.ph.preheader: +; CHECK-NEXT: br label [[FOR_BODY4_LR_PH:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body4.lr.ph: +; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]] +; CHECK-NEXT: br label [[FOR_BODY4:%.*]] +; CHECK: for.body4: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 0, [[FOR_BODY4_LR_PH]] ], [ [[ADD9:%.*]], [[FOR_BODY4]] ] +; CHECK-NEXT: [[J_026:%.*]] = phi i32 [ 0, [[FOR_BODY4_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY4]] ] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[J_026]], [[MUL]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[ADD]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[J_026]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +; CHECK-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[ADD9]] = add nsw i32 [[MUL7]], [[TMP0]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[J_026]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]] +; CHECK: for.cond.cleanup3: +; CHECK-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC11]] = add nuw nsw i32 [[I_028]], 1 +; CHECK-NEXT: [[EXITCOND29:%.*]] = icmp eq i32 [[INC11]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND29]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_LR_PH]] +; +entry: + %cmp27 = icmp sgt i32 %N, 0 + br i1 %cmp27, label %for.body4.lr.ph.preheader, label %for.cond.cleanup + +for.body4.lr.ph.preheader: ; preds = %entry + br label %for.body4.lr.ph + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + ret void + +for.body4.lr.ph: ; preds = %for.body4.lr.ph.preheader, %for.cond.cleanup3 + %i.028 = phi i32 [ %inc11, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.028 + store i32 0, i32* %arrayidx, align 4 + %mul = mul nsw i32 %i.028, %N + br label %for.body4 + +for.body4: ; preds = %for.body4, %for.body4.lr.ph + %0 = phi i32 [ 0, %for.body4.lr.ph ], [ %add9, %for.body4 ] + %j.026 = phi i32 [ 0, %for.body4.lr.ph ], [ %inc, %for.body4 ] + %add = add nsw i32 %j.026, %mul + %arrayidx5 = getelementptr inbounds i32, i32* %A, i32 %add + %1 = load i32, i32* %arrayidx5, align 4 + %arrayidx6 = getelementptr inbounds i32, i32* %x, i32 %j.026 + %2 = load i32, i32* %arrayidx6, align 4 + %mul7 = mul nsw i32 %2, %1 + %add9 = add nsw i32 %mul7, %0 + %inc = add nuw nsw i32 %j.026, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup3, label %for.body4 + +for.cond.cleanup3: ; preds = %for.body4 + store i32 %add9, i32* %arrayidx, align 4 + %inc11 = add nuw nsw i32 %i.028, 1 + %exitcond29 = icmp eq i32 %inc11, %N + br i1 %exitcond29, label %for.cond.cleanup, label %for.body4.lr.ph +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -0,0 +1,291 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py + +; XFAIL: * +; TODO: Handling of free not implemented yet. + +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +declare void @unknown_func() +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare noalias i8* @malloc(i32) +declare void @free(i8* nocapture) + +define void @test16(i32* noalias %P) { +; CHECK-LABEL: @test16( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: call void @free(i8* [[P2]]) +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 1, i32* %P + br i1 true, label %bb1, label %bb3 +bb1: + store i32 1, i32* %P + br label %bb3 +bb3: + call void @free(i8* %P2) + ret void +} + + +define void @test17(i32* noalias %P) { +; CHECK-LABEL: @test17( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 1, i32* [[P]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: call void @free(i8* [[P2]]) +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 1, i32* %P + br i1 true, label %bb1, label %bb3 +bb1: + call void @unknown_func() + store i32 1, i32* %P + br label %bb3 +bb3: + call void @free(i8* %P2) + ret void +} + +define void @test6(i32* noalias %P) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + call void @unknown_func() + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + +define void @test19(i32* noalias %P) { +; CHECK-LABEL: @test19( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 1, i32* %arrayidx1, align 4 + br label %bb3 +bb3: + ret void +} + + +define void @test20(i32* noalias %P) { +; CHECK-LABEL: @test20( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 1, i32* %arrayidx1, align 4 + ret void +} + + +define i32 @test22(i32* %P, i32* noalias %Q, i32* %R) { +; CHECK-LABEL: @test22( +; CHECK-NEXT: store i32 2, i32* [[P:%.*]] +; CHECK-NEXT: store i32 3, i32* [[Q:%.*]] +; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[R:%.*]] +; CHECK-NEXT: ret i32 [[L]] +; + store i32 1, i32* %Q + store i32 2, i32* %P + store i32 3, i32* %Q + %l = load i32, i32* %R + ret i32 %l +} + + +define void @test23(i32* noalias %P) { +; CHECK-LABEL: @test23( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + br i1 true, label %bb1, label %bb2 +bb1: + store i32 0, i32* %P + br label %bb3 +bb2: + call void @unknown_func() + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test24(i32* noalias %P) { +; CHECK-LABEL: @test24( +; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + br i1 true, label %bb2, label %bb1 +bb1: + store i32 0, i32* %P + br label %bb3 +bb2: + call void @unknown_func() + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + +define i8* @test26() { +; CHECK-LABEL: @test26( +; CHECK-NEXT: bb1: +; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[M:%.*]] = call noalias i8* @malloc(i32 10) +; CHECK-NEXT: store i8 1, i8* [[M]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[R:%.*]] = phi i8* [ null, [[BB1:%.*]] ], [ [[M]], [[BB2]] ] +; CHECK-NEXT: ret i8* [[R]] +; +bb1: + br i1 true, label %bb2, label %bb3 +bb2: + %m = call noalias i8* @malloc(i32 10) + store i8 1, i8* %m + br label %bb3 +bb3: + %r = phi i8* [ null, %bb1 ], [ %m, %bb2 ] + ret i8* %r +} + + +define void @test27() { +; CHECK-LABEL: @test27( +; CHECK-NEXT: bb1: +; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[M:%.*]] = call noalias i8* @malloc(i32 10) +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[R:%.*]] = phi i8* [ null, [[BB1:%.*]] ], [ [[M]], [[BB2]] ] +; CHECK-NEXT: ret void +; +bb1: + br i1 true, label %bb2, label %bb3 +bb2: + %m = call noalias i8* @malloc(i32 10) + store i8 1, i8* %m + br label %bb3 +bb3: + %r = phi i8* [ null, %bb1 ], [ %m, %bb2 ] + ret void +} + + +define i8* @test28() { +; CHECK-LABEL: @test28( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[M:%.*]] = call noalias i8* @malloc(i32 10) +; CHECK-NEXT: [[MC0:%.*]] = bitcast i8* [[M]] to i8* +; CHECK-NEXT: [[MC1:%.*]] = bitcast i8* [[MC0]] to i8* +; CHECK-NEXT: [[MC2:%.*]] = bitcast i8* [[MC1]] to i8* +; CHECK-NEXT: [[MC3:%.*]] = bitcast i8* [[MC2]] to i8* +; CHECK-NEXT: [[MC4:%.*]] = bitcast i8* [[MC3]] to i8* +; CHECK-NEXT: [[MC5:%.*]] = bitcast i8* [[MC4]] to i8* +; CHECK-NEXT: [[MC6:%.*]] = bitcast i8* [[MC5]] to i8* +; CHECK-NEXT: [[M0:%.*]] = bitcast i8* [[MC6]] to i8* +; CHECK-NEXT: store i8 2, i8* [[M]] +; CHECK-NEXT: ret i8* [[M0]] +; +bb0: + %m = call noalias i8* @malloc(i32 10) + %mc0 = bitcast i8* %m to i8* + %mc1 = bitcast i8* %mc0 to i8* + %mc2 = bitcast i8* %mc1 to i8* + %mc3 = bitcast i8* %mc2 to i8* + %mc4 = bitcast i8* %mc3 to i8* + %mc5 = bitcast i8* %mc4 to i8* + %mc6 = bitcast i8* %mc5 to i8* + %m0 = bitcast i8* %mc6 to i8* + store i8 2, i8* %m + ret i8* %m0 +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +declare void @unknown_func() +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +define void @test19(i32* noalias %P) { +; CHECK-LABEL: @test19( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 1, i32* %arrayidx1, align 4 + br label %bb3 +bb3: + ret void +} + + +define void @test20(i32* noalias %P) { +; CHECK-LABEL: @test20( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 1, i32* %arrayidx1, align 4 + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll @@ -0,0 +1,105 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + + +define void @test4(i32* noalias %P) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + %x = load i32, i32* %P + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + +define void @test5(i32* noalias %P) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 1, i32* [[P:%.*]] +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i32 1, i32* [[P]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + br i1 true, label %bb1, label %bb2 +bb1: + store i32 1, i32* %P + br label %bb3 +bb2: + store i32 1, i32* %P + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + +define void @test8(i32* %P, i32* %Q) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 1, i32* [[P:%.*]] +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i32 1, i32* [[Q:%.*]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + br i1 true, label %bb1, label %bb2 +bb1: + store i32 1, i32* %P + br label %bb3 +bb2: + store i32 1, i32* %Q + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + +define void @test10(i32* noalias %P) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i8 1, i8* [[P2]] +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i8 1, i8* %P2 + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + + +define void @test2(i32* noalias %P) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + store i32 1, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + +define void @test3(i32* noalias %P) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + store i32 0, i32* %P + br label %bb3 +bb3: + ret void +} + + +define void @test7(i32* noalias %P, i32* noalias %Q) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[Q:%.*]] +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 1, i32* %Q + br i1 true, label %bb1, label %bb2 +bb1: + load i32, i32* %P + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 0, i32* %Q + store i32 0, i32* %P + ret void +} + +define i32 @test22(i32* %P, i32* noalias %Q, i32* %R) { +; CHECK-LABEL: @test22( +; CHECK-NEXT: store i32 2, i32* [[P:%.*]] +; CHECK-NEXT: store i32 3, i32* [[Q:%.*]] +; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[R:%.*]] +; CHECK-NEXT: ret i32 [[L]] +; + store i32 1, i32* %Q + store i32 2, i32* %P + store i32 3, i32* %Q + %l = load i32, i32* %R + ret i32 %l +} + +define void @test9(i32* noalias %P) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret void +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + ret void +bb3: + store i32 0, i32* %P + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; XFAIL: * +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +declare void @unknown_func() + +define void @test6(i32* noalias %P) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + call void @unknown_func() + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + +define i32 @test22(i32* %P, i32* noalias %Q, i32* %R) { +; CHECK-LABEL: @test22( +; CHECK-NEXT: store i32 2, i32* [[P:%.*]] +; CHECK-NEXT: store i32 3, i32* [[Q:%.*]] +; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[R:%.*]] +; CHECK-NEXT: ret i32 [[L]] +; + store i32 1, i32* %Q + store i32 2, i32* %P + store i32 3, i32* %Q + %l = load i32, i32* %R + ret i32 %l +} + + +define void @test23(i32* noalias %P) { +; CHECK-LABEL: @test23( +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + br i1 true, label %bb1, label %bb2 +bb1: + store i32 0, i32* %P + br label %bb3 +bb2: + call void @unknown_func() + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} + + +define void @test24(i32* noalias %P) { +; CHECK-LABEL: @test24( +; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: ret void +; + br i1 true, label %bb2, label %bb1 +bb1: + store i32 0, i32* %P + br label %bb3 +bb2: + call void @unknown_func() + br label %bb3 +bb3: + store i32 0, i32* %P + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -68,185 +68,6 @@ declare noalias i8* @malloc(i32) declare noalias i8* @calloc(i32, i32) - -; PR8701 - -;; Fully dead overwrite of memcpy. -define void @test15(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test15( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - ret void -} - -;; Fully dead overwrite of memcpy. -define void @test15_atomic(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test15_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -;; Fully dead overwrite of memcpy. -define void @test15_atomic_weaker(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test15_atomic_weaker( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -;; Fully dead overwrite of memcpy. -define void @test15_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test15_atomic_weaker_2( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) - ret void -} - -;; Full overwrite of smaller memcpy. -define void @test16(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test16( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - ret void -} - -;; Full overwrite of smaller memcpy. -define void @test16_atomic(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test16_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -;; Full overwrite of smaller memory where overwrite has stronger atomicity -define void @test16_atomic_weaker(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test16_atomic_weaker( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i1 false) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -;; Full overwrite of smaller memory where overwrite has weaker atomicity. -define void @test16_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test16_atomic_weaker_2( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) - ret void -} - -;; Overwrite of memset by memcpy. -define void @test17(i8* %P, i8* noalias %Q) nounwind ssp { -; CHECK-LABEL: @test17( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - ret void -} - -;; Overwrite of memset by memcpy. -define void @test17_atomic(i8* %P, i8* noalias %Q) nounwind ssp { -; CHECK-LABEL: @test17_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -;; Overwrite of memset by memcpy. Overwrite is stronger atomicity. We can -;; remove the memset. -define void @test17_atomic_weaker(i8* %P, i8* noalias %Q) nounwind ssp { -; CHECK-LABEL: @test17_atomic_weaker( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i1 false) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - ret void -} - -;; Overwrite of memset by memcpy. Overwrite is weaker atomicity. We can remove -;; the memset. -define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp { -; CHECK-LABEL: @test17_atomic_weaker_2( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) - ret void -} - -; Should not delete the volatile memset. -define void @test17v(i8* %P, i8* %Q) nounwind ssp { -; CHECK-LABEL: @test17v( -; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 42, i64 8, i1 true) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - ret void -} - -; PR8728 -; Do not delete instruction where possible situation is: -; A = B -; A = A -; -; NB! See PR11763 - currently LLVM allows memcpy's source and destination to be -; equal (but not inequal and overlapping). -define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { -; CHECK-LABEL: @test18( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) - ret void -} - -define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp { -; CHECK-LABEL: @test18_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) -; CHECK-NEXT: ret void -; - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) - tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1) - ret void -} - - ; The store here is not dead because the byval call reads it. declare void @test19f({i32}* byval align 4 %P)