diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -51,6 +51,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -73,6 +74,7 @@ #include using namespace llvm; +using namespace PatternMatch; #define DEBUG_TYPE "dse" @@ -1533,7 +1535,7 @@ auto *MD = dyn_cast_or_null(MA); if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit && - State.getLocForWriteEx(&I)) + (State.getLocForWriteEx(&I) || State.isMemTerminatorInst(&I))) State.MemDefs.push_back(MD); // Track whether alloca and alloca-like objects are visible in the @@ -1667,6 +1669,51 @@ return true; } + /// If \p I is a memory terminator like llvm.lifetime.end or free, return a + /// pair with the MemoryLocation terminated by \p I and a boolean flag + /// indicating whether \p I is a free-like call. + Optional> + getLocForTerminator(Instruction *I) const { + uint64_t Len; + Value *Ptr; + if (match(I, m_Intrinsic(m_ConstantInt(Len), + m_Value(Ptr)))) + return {std::make_pair(MemoryLocation(Ptr, Len), false)}; + + if (auto *CB = dyn_cast(I)) { + if (isFreeCall(I, &TLI)) + return {std::make_pair(MemoryLocation(CB->getArgOperand(0)), true)}; + } + + return None; + } + + /// Returns true if \p I is a memory terminator instruction like + /// llvm.lifetime.end or free. + bool isMemTerminatorInst(Instruction *I) const { + IntrinsicInst *II = dyn_cast(I); + return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) || + isFreeCall(I, &TLI); + } + + /// Returns true if \p MaybeTerm is a memory terminator for the same + /// underlying object as \p DefLoc. + bool isMemTerminator(MemoryLocation DefLoc, Instruction *MaybeTerm) const { + Optional> MaybeTermLoc = + getLocForTerminator(MaybeTerm); + + if (!MaybeTermLoc) + return false; + + // If the terminator is a free-like call, all accesses to the underlying + // object can be considered terminated. + if (MaybeTermLoc->second) { + DataLayout DL = MaybeTerm->getParent()->getModule()->getDataLayout(); + DefLoc = MemoryLocation(GetUnderlyingObject(DefLoc.Ptr, DL)); + } + return AA.isMustAlias(MaybeTermLoc->first, DefLoc); + } + // Returns true if \p Use may read from \p DefLoc. bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const { if (!UseInst->mayReadFromMemory()) @@ -1772,6 +1819,11 @@ continue; } + // A memory terminator kills all preceeding MemoryDefs and all succeeding + // MemoryAccesses. We do not have to check it's users. + if (isMemTerminator(DefLoc, UseInst)) + continue; + // Uses which may read the original MemoryDef mean we cannot eliminate the // original MD. Stop walk. if (isReadClobber(DefLoc, UseInst)) { @@ -2059,6 +2111,12 @@ Instruction *SI = KillingDef->getMemoryInst(); auto MaybeSILoc = State.getLocForWriteEx(SI); + if (State.isMemTerminatorInst(SI)) + MaybeSILoc = State.getLocForTerminator(SI).map( + [](const std::pair &P) { return P.first; }); + else + MaybeSILoc = State.getLocForWriteEx(SI); + if (!MaybeSILoc) { LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " << *SI << "\n"); @@ -2165,43 +2223,55 @@ continue; MemoryLocation NILoc = *State.getLocForWriteEx(NI); - // Check if NI overwrites SI. - int64_t InstWriteOffset, DepWriteOffset; - auto Iter = State.IOLs.insert( - std::make_pair( - NI->getParent(), InstOverlapIntervalsTy())); - auto &IOL = Iter.first->second; - OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, - InstWriteOffset, NI, IOL, AA, &F); - - if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) { - auto *Earlier = dyn_cast(NI); - auto *Later = dyn_cast(SI); - if (Constant *Merged = tryToMergePartialOverlappingStores( - Earlier, Later, InstWriteOffset, DepWriteOffset, DL, &AA, - &DT)) { - - // Update stored value of earlier store to merged constant. - Earlier->setOperand(0, Merged); - ++NumModifiedStores; - MadeChange = true; - - // Remove later store and remove any outstanding overlap intervals for - // the updated store. - State.deleteDeadInstruction(Later); - auto I = State.IOLs.find(Earlier->getParent()); - if (I != State.IOLs.end()) - I->second.erase(Earlier); - break; - } - } - if (OR == OW_Complete) { + if (State.isMemTerminatorInst(SI)) { + const Value *NIUnd = GetUnderlyingObject(NILoc.Ptr, DL); + if (!SILocUnd || SILocUnd != NIUnd) + continue; LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI << "\n KILLER: " << *SI << '\n'); State.deleteDeadInstruction(NI); ++NumFastStores; MadeChange = true; + } else { + // Check if NI overwrites SI. + int64_t InstWriteOffset, DepWriteOffset; + auto Iter = State.IOLs.insert( + std::make_pair( + NI->getParent(), InstOverlapIntervalsTy())); + auto &IOL = Iter.first->second; + OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, + InstWriteOffset, NI, IOL, AA, &F); + + if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) { + auto *Earlier = dyn_cast(NI); + auto *Later = dyn_cast(SI); + if (Constant *Merged = tryToMergePartialOverlappingStores( + Earlier, Later, InstWriteOffset, DepWriteOffset, DL, &AA, + &DT)) { + + // Update stored value of earlier store to merged constant. + Earlier->setOperand(0, Merged); + ++NumModifiedStores; + MadeChange = true; + + // Remove later store and remove any outstanding overlap intervals + // for the updated store. + State.deleteDeadInstruction(Later); + auto I = State.IOLs.find(Earlier->getParent()); + if (I != State.IOLs.end()) + I->second.erase(Earlier); + break; + } + } + + if (OR == OW_Complete) { + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI + << "\n KILLER: " << *SI << '\n'); + State.deleteDeadInstruction(NI); + ++NumFastStores; + MadeChange = true; + } } } } diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2016-07-17-UseAfterFree.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2016-07-17-UseAfterFree.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2016-07-17-UseAfterFree.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2016-07-17-UseAfterFree.ll @@ -1,5 +1,4 @@ -; XFAIL: * -; RUN: opt < %s -basic-aa -dse-enable-dse-memoryssa -S -enable-dse-partial-overwrite-tracking | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S -enable-dse-partial-overwrite-tracking | FileCheck %s ; PR28588 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll @@ -1,5 +1,3 @@ -; XFAIL: * - ; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-p:64:64:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll @@ -1,5 +1,3 @@ -; XFAIL: * - ; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa < %s | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @@ -35,5 +33,3 @@ ; CHECK: lifetime.end ret void } - - diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Test that the getelementptr generated when the dse pass determines that ; a memset can be shortened has the debugloc carried over from the memset. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll @@ -207,6 +207,7 @@ call void @capture(i8* %m) ret i8* %m } + ; Stores to stack objects can be eliminated if they are not captured inside the function. define void @test_alloca_nocapture_1() { ; CHECK-LABEL: @test_alloca_nocapture_1( diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -17,13 +17,12 @@ define void @test16(i32* noalias %P) { ; CHECK-LABEL: @test16( ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: call void @free(i8* [[P2]]) +; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: ret void ; %P2 = bitcast i32* %P to i8* @@ -34,6 +33,7 @@ br label %bb3 bb3: call void @free(i8* %P2) + store i32 1, i32* %P ret void } @@ -41,11 +41,9 @@ define void @test17(i32* noalias %P) { ; CHECK-LABEL: @test17( ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] ; CHECK: bb1: ; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: call void @free(i8* [[P2]]) @@ -63,6 +61,30 @@ ret void } +define void @test17_read_after_free(i32* noalias %P) { +; CHECK-LABEL: @test17_read_after_free( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: call void @free(i8* [[P2]]) +; CHECK-NEXT: [[LV:%.*]] = load i8, i8* [[P2]] +; CHECK-NEXT: ret void +; + %P2 = bitcast i32* %P to i8* + store i32 1, i32* %P + br i1 true, label %bb1, label %bb3 +bb1: + store i32 1, i32* %P + br label %bb3 +bb3: + call void @free(i8* %P2) + %lv = load i8, i8* %P2 + ret void +} + + define void @test6(i32* noalias %P) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -625,7 +625,6 @@ ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* ; CHECK-NEXT: store i32 1, i32* [[P]], align 4 ; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: store i32 2, i32* [[P]], align 4 ; CHECK-NEXT: call void @free(i8* [[P2]]) ; CHECK-NEXT: ret void ;