diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -52,6 +52,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -73,6 +74,7 @@ #include using namespace llvm; +using namespace PatternMatch; #define DEBUG_TYPE "dse" @@ -1495,7 +1497,8 @@ auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit && - hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) + ((hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) || + State.isMemTerminatorInst(&I))) State.MemDefs.push_back(MD); // Track alloca and alloca-like objects. Here we care about objects not @@ -1563,12 +1566,50 @@ return false; ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); - Optional UseLoc = getLocForWriteEx(UseInst); - return isModSet(MR) && isMustSet(MR) && - UseLoc->Size.getValue() >= DefLoc.Size.getValue(); + return isModSet(MR) && isMustSet(MR); } - /// Returns true if \p Use may read from \p DefLoc. + /// Returns the MemoryLocation terminated by \p I, if \p I is a memory + /// terminator like llvm.lifetime.end or free. + Optional getLocForTerminator(Instruction *I) const { + uint64_t Len; + Value *Ptr; + if (match(I, m_Intrinsic(m_ConstantInt(Len), + m_Value(Ptr)))) + return {MemoryLocation(Ptr, Len)}; + + if (auto CS = CallSite(I)) { + if (isFreeCall(I, &TLI)) + return {MemoryLocation(CS.getArgument(0))}; + } + + return None; + } + + /// Returns true if \p I is a memory terminator instruction like + /// llvm.lifetime.end or free. + bool isMemTerminatorInst(Instruction *I) const { + IntrinsicInst *II = dyn_cast(I); + return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) || + isFreeCall(I, &TLI); + } + + /// Returns true if \p MaybeTerm is a memory terminator for the same + /// underlying object as \p DefLoc. + bool isMemTerminator(MemoryLocation DefLoc, Instruction *MaybeTerm) const { + Optional MaybeTermLoc = getLocForTerminator(MaybeTerm); + + if (!MaybeTermLoc) + return false; + + if (isFreeCall(MaybeTerm, &TLI)) { + DataLayout DL = MaybeTerm->getParent()->getModule()->getDataLayout(); + DefLoc = MemoryLocation(GetUnderlyingObject(DefLoc.Ptr, DL)); + } + return AA.isMustAlias(*MaybeTermLoc, DefLoc); + } + + // Returns true if \p Use may read from \p DefLoc. bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const { if (!UseInst->mayReadFromMemory()) return false; @@ -1674,6 +1715,11 @@ continue; } + // A memory terminator kills all preceeding MemoryDefs and all succeeding + // MemoryAccesses. We do not have to check it's users. + if (isMemTerminator(DefLoc, UseInst)) + continue; + // Uses which may read the original MemoryDef mean we cannot eliminate the // original MD. Stop walk. if (isReadClobber(DefLoc, UseInst)) { @@ -1808,7 +1854,12 @@ if (State.SkipStores.count(KillingDef)) continue; Instruction *SI = KillingDef->getMemoryInst(); - auto MaybeSILoc = State.getLocForWriteEx(SI); + Optional MaybeSILoc; + if (State.isMemTerminatorInst(SI)) + MaybeSILoc = State.getLocForTerminator(SI); + else + MaybeSILoc = State.getLocForWriteEx(SI); + if (!MaybeSILoc) { LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " << *SI << "\n"); @@ -1890,22 +1941,33 @@ if (!DebugCounter::shouldExecute(MemorySSACounter)) break; - // Check if NI overwrites SI. - int64_t InstWriteOffset, DepWriteOffset; - auto Iter = State.IOLs.insert( - std::make_pair( - NI->getParent(), InstOverlapIntervalsTy())); - auto &IOL = Iter.first->second; - OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, - InstWriteOffset, NI, IOL, AA, &F); - ToCheck.insert(NextDef->getDefiningAccess()); - if (OR == OW_Complete) { + if (State.isMemTerminatorInst(SI)) { + const Value *NIUnd = GetUnderlyingObject(NILoc.Ptr, DL); + if (!SILocUnd || SILocUnd != NIUnd) + continue; LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI << "\n KILLER: " << *SI << '\n'); State.deleteDeadInstruction(NI); ++NumFastStores; MadeChange = true; + } else { + // Check if NI overwrites SI. + int64_t InstWriteOffset, DepWriteOffset; + auto Iter = State.IOLs.insert( + std::make_pair( + NI->getParent(), InstOverlapIntervalsTy())); + auto &IOL = Iter.first->second; + OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, + InstWriteOffset, NI, IOL, AA, &F); + + if (OR == OW_Complete) { + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI + << "\n KILLER: " << *SI << '\n'); + State.deleteDeadInstruction(NI); + ++NumFastStores; + MadeChange = true; + } } } } diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll @@ -1,4 +1,5 @@ ; XFAIL: * + ; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll @@ -1,5 +1,3 @@ -; XFAIL: * - ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-p:64:64:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Test that the getelementptr generated when the dse pass determines that ; a memset can be shortened has the debugloc carried over from the memset. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll @@ -207,7 +207,7 @@ call void @capture(i8* %m) ret i8* %m } -; TODO: Remove store in exit. + ; Stores to stack objects can be eliminated if they are not captured inside the function. define void @test_alloca_nocapture_1() { ; CHECK-LABEL: @test_alloca_nocapture_1( @@ -228,7 +228,6 @@ ret void } -; TODO: Remove store in exit. ; Cannot remove first store i8 0, i8* %m, as the call to @capture captures the object. define void @test_alloca_capture_1() { ; CHECK-LABEL: @test_alloca_capture_1( @@ -250,7 +249,6 @@ ret void } -; TODO: Remove store at exit. ; We can remove the last store to %m, even though it escapes because the alloca ; becomes invalid after the function returns. define void @test_alloca_capture_2(%S1* %E) { diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -17,13 +17,12 @@ define void @test16(i32* noalias %P) { ; CHECK-LABEL: @test16( ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: call void @free(i8* [[P2]]) +; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: ret void ; %P2 = bitcast i32* %P to i8* @@ -34,6 +33,7 @@ br label %bb3 bb3: call void @free(i8* %P2) + store i32 1, i32* %P ret void } @@ -41,11 +41,9 @@ define void @test17(i32* noalias %P) { ; CHECK-LABEL: @test17( ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] ; CHECK: bb1: ; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: call void @free(i8* [[P2]]) @@ -63,6 +61,30 @@ ret void } +define void @test17_read_after_free(i32* noalias %P) { +; CHECK-LABEL: @test17_read_after_free( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: call void @free(i8* [[P2]]) +; CHECK-NEXT: [[LV:%.*]] = load i8, i8* [[P2]] +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 1, i32* %P + br i1 true, label %bb1, label %bb3 +bb1: + store i32 1, i32* %P + br label %bb3 +bb3: + call void @free(i8* %P2) + %lv = load i8, i8* %P2 + ret void +} + + define void @test6(i32* noalias %P) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -538,7 +538,6 @@ ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* ; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: store i32 2, i32* [[P]] ; CHECK-NEXT: call void @free(i8* [[P2]]) ; CHECK-NEXT: ret void ;