diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -52,6 +52,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -73,6 +74,7 @@ #include using namespace llvm; +using namespace PatternMatch; #define DEBUG_TYPE "dse" @@ -1391,7 +1393,7 @@ // The above MemDep based methods will eventually be removed (if they are unused // by those below). -enum class WalkType { Bad, Next }; +enum class WalkType { Bad, Next, Terminated, End }; struct WalkResult { WalkType Type; MemoryDef *Next; @@ -1399,6 +1401,22 @@ operator bool() { return Type != WalkType::Bad; } }; +Optional getLocForTerminator(Instruction *I, + const TargetLibraryInfo &TLI) { + uint64_t Len; + Value *Ptr; + if (match(I, m_Intrinsic(m_ConstantInt(Len), + m_Value(Ptr)))) + return {MemoryLocation(Ptr, Len)}; + + if (auto CS = CallSite(I)) { + if (isFreeCall(I, &TLI)) + return {MemoryLocation(CS.getArgument(0))}; + } + + return None; +} + Optional getLocForWriteEx(Instruction *I, const TargetLibraryInfo &TLI) { if (!I->mayWriteToMemory()) @@ -1492,7 +1510,6 @@ if (const IntrinsicInst *II = dyn_cast(M->getMemoryInst())) { switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::assume: @@ -1508,6 +1525,32 @@ return false; } +bool isMemTerminatorInst(Instruction *I, const TargetLibraryInfo &TLI) { + IntrinsicInst *II = dyn_cast(I); + return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) || + isFreeCall(I, &TLI); +} + +bool isMemTerminator(MemoryDef *Original, MemoryUseOrDef *M, + const TargetLibraryInfo &TLI, AliasAnalysis &AA) { + Optional MaybeTermLoc = + getLocForTerminator(M->getMemoryInst(), TLI); + + if (!MaybeTermLoc) + return false; + + MemoryLocation DefLoc = *getLocForWriteEx(Original->getMemoryInst(), TLI); + + if (isFreeCall(M->getMemoryInst(), &TLI)) { + DataLayout DL = + Original->getMemoryInst()->getParent()->getModule()->getDataLayout(); + if (auto SI = dyn_cast(Original->getMemoryInst())) + DefLoc = + MemoryLocation(GetUnderlyingObject(getStoredPointerOperand(SI), DL)); + } + return AA.isMustAlias(*MaybeTermLoc, DefLoc); +} + // Find the next MemoryDef clobbering Current, if there is any, skipping things // that do not alias. WalkType::Bad indiciates that the walk hit one of the // following: @@ -1555,6 +1598,13 @@ if (isNoopIntrinsic(cast(UseAccess))) continue; + if (isMemTerminator(Original, cast(UseAccess), TLI, AA)) { + if (NextAccess) + return {WalkType::Bad, nullptr}; + NextAccess = UseAccess; + continue; + } + // Uses which may read the original MemoryDef mean we cannot eliminate the // original MD. Stop walk. if (isReadClobber(OriginalLoc, cast(UseAccess), AA, TLI)) @@ -1576,6 +1626,8 @@ // NextAccess. if (NextAccess) { if (auto *NextDef = dyn_cast(NextAccess)) { + if (isMemTerminator(Original, cast(NextAccess), TLI, AA)) + return {WalkType::Terminated, NextDef}; if (isWriteClobber(OriginalLoc, cast(NextDef), AA, TLI)) { return {WalkType::Next, NextDef}; } @@ -1585,7 +1637,7 @@ PostOrderMap); } - // No aliasing definition found, bail out. + // No aliasing definition found, we reached the end of the walk. return {WalkType::Bad, nullptr}; } @@ -1623,7 +1675,7 @@ } } -// Check for any extra throws between SI and NI that block DSE. This only +// Check forany extra throws between SI and NI that block DSE. This only // checks extra maythrows (those that arn't MemoryDef's). MemoryDef that may // throw are handled during the walk from one def to the next. bool mayThrowBetween( @@ -1682,6 +1734,64 @@ return false; } +/// For all memory terminators in \p MemTerminators, traverse the users and try +/// to eliminate stores to terminated objects, until we reach the end of the +/// function or a lifetime.start. +bool eliminateStoresAfterTerminators( + SmallVectorImpl &MemTerminators, MemorySSA &MSSA, + AliasAnalysis &AA, PostDominatorTree &PDT, const TargetLibraryInfo &TLI) { + SmallPtrSet SkipStores; + InstOverlapIntervalsTy IOL; + bool Changed = false; + + for (Instruction *Terminator : MemTerminators) { + SmallVector WorkList; + auto *T = MSSA.getMemoryAccess(Terminator); + + auto PushMemUses = [&WorkList](MemoryAccess *Acc) { + for (Use &U : Acc->uses()) + WorkList.push_back(cast(U.getUser())); + }; + PushMemUses(T); + + MemoryLocation TermLoc = *getLocForTerminator(Terminator, TLI); + while (!WorkList.empty()) { + MemoryAccess *C = WorkList.back(); + WorkList.pop_back(); + if (isa(C)) + continue; + if (isa(C)) + break; + + Instruction *CInst = cast(C)->getMemoryInst(); + Optional MaybeCLoc = getLocForWriteEx(CInst, TLI); + if (!MaybeCLoc) + break; + if (AA.isNoAlias(*MaybeCLoc, TermLoc)) { + PushMemUses(C); + continue; + } + + if (match(CInst, m_Intrinsic())) + break; + + if (AA.isMustAlias(*MaybeCLoc, TermLoc) && + PDT.dominates(Terminator->getParent(), CInst->getParent())) { + Changed = true; + LLVM_DEBUG(dbgs() << "DSE: Store " << *CInst + << " is dead because it is after terminator: " + << *Terminator << "\n"); + deleteDeadInstruction(CInst, IOL, MSSA, TLI, SkipStores); + ++NumFastStores; + } + + PushMemUses(C); + } + } + + return Changed; +} + bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, @@ -1705,6 +1815,8 @@ // between them. DenseMap> ThrowingBlocks; + SmallVector MemTerminators; + // Post-order numbers for basic blocks in the function. DenseMap PostOrderNumbers; unsigned PO = 0; @@ -1718,8 +1830,12 @@ for (Instruction &I : *BB) { if (I.mayThrow() && !MSSA.getMemoryAccess(&I)) ThrowingCnt++; + BlockMap[&I] = ThrowingCnt; + if (isMemTerminatorInst(&I, TLI)) + MemTerminators.push_back(&I); + auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); if (MD && hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) Stores.push_back(MD); @@ -1761,6 +1877,24 @@ WalkResult Next; while ((Next = getNextMemoryDef(SIMD, SIDef, MemorySSAScanLimit, AA, TLI, PostOrderNumbers))) { + // We have hit the end of the walk. If the store is non-escaping, it is + // dead and we can kill it. + if (Next.Type == WalkType::Terminated) { + Instruction *TermI = cast(Next.Next)->getMemoryInst(); + if (mayThrowBetween(SI, TermI, SILocUnd, InvisibleToCaller, + ThrowingBlocks)) + break; + + MadeChange = true; + SkipStores.insert(SIMD); + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *SI + << "\n KILLER: " << *TermI << '\n'); + + deleteDeadInstruction(SIMD->getMemoryInst(), IOL, MSSA, TLI, + SkipStores); + ++NumFastStores; + break; + } MemoryAccess *NextAccess = Next.Next; LLVM_DEBUG(dbgs() << " Checking " << *NextAccess << "\n"); MemoryDef *ND = dyn_cast(NextAccess); @@ -1813,6 +1947,8 @@ } } + MadeChange |= + eliminateStoresAfterTerminators(MemTerminators, MSSA, AA, PDT, TLI); return MadeChange; } } // end anonymous namespace diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-p:64:64:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @@ -30,7 +29,22 @@ ; CHECK: lifetime.start store i32 0, i32* %Q ;; This store is dead. ; CHECK-NOT: store - call void @llvm.lifetime.end.p0i8(i64 4, i8* %R) + call void @llvm.lifetime.end.p0i8(i64 8, i8* %R) +; CHECK: lifetime.end + ret void +} + +define void @test3(i32* %P) { +; CHECK: test3 + %Q = getelementptr i32, i32* %P, i32 1 + %R = bitcast i32* %Q to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %R) +; CHECK: lifetime.start + store i32 0, i32* %Q ;; This store is dead. + %S = getelementptr i32, i32* %P, i32 1 + %S.cast = bitcast i32* %S to i8* +; CHECK-NOT: store + call void @llvm.lifetime.end.p0i8(i64 4, i8* %S.cast) ; CHECK: lifetime.end ret void } diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -380,7 +380,6 @@ ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* ; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: store i32 2, i32* [[P]] ; CHECK-NEXT: call void @free(i8* [[P2]]) ; CHECK-NEXT: ret void ;