diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1563,7 +1563,7 @@ } } - enum class WalkType { Bad, Next, Terminated }; + enum class WalkType { Bad, Next, Terminated, End }; struct WalkResult { WalkType Type; MemoryDef *Next; @@ -1783,7 +1783,7 @@ } // No aliasing definition found, we reached the end of the walk. - return {WalkType::Bad, nullptr}; + return {WalkType::End, nullptr}; } /// For all memory terminators in \p MemTerminators, traverse the users and @@ -1840,6 +1840,25 @@ } return Changed; } + + bool handleEnd(MemoryDef *Def) { + const DataLayout &DL = F.getParent()->getDataLayout(); + Instruction *DefI = Def->getMemoryInst(); + // See through pointer-to-pointer bitcasts + SmallVector Pointers; + GetUnderlyingObjects(getLocForWriteEx(DefI)->Ptr, Pointers, DL); + for (const Value *Pointer : Pointers) { + if (!InvisibleToCaller.count(Pointer)) + return false; + if ((isAllocLikeFn(Pointer, &TLI) && + PointerMayBeCaptured(Pointer, true, true))) + return false; + } + + deleteDeadInstruction(DefI); + ++NumFastStores; + return true; + } }; bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, @@ -1867,11 +1886,21 @@ LLVM_DEBUG(dbgs() << "Trying to eliminate " << *SI << "\n"); + MemoryDef *SIDef = SIMD; // Walk MemorySSA forward to find a MemoryDef that kills SI. DSEState::WalkResult Next; while ((Next = State.getNextMemoryDef( SIMD, SILoc, DefObj, LocVisibleToCaller, MemorySSAScanLimit))) { - + // We have hit the end of the walk. If the store is non-escaping, it is + // dead and we can kill it. + if (Next.Type == DSEState::WalkType::End) { + if (!DebugCounter::shouldExecute(MemorySSACounter)) + break; + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store at end:\n DEAD: " + << *SIDef->getMemoryInst() << "\n"); + MadeChange |= State.handleEnd(SIDef); + break; + } if (Next.Type == DSEState::WalkType::Terminated) { Instruction *TermI = cast(Next.Next)->getMemoryInst(); if (State.mayThrowBetween(SI, TermI, SILocUnd)) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll deleted file mode 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll +++ /dev/null @@ -1,50 +0,0 @@ -; XFAIL: * - -; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s - -; We DSE stack alloc'ed and byval locations, in the presence of fences. -; Fence does not make an otherwise thread local store visible. -; Right now the DSE in presence of fence is only done in end blocks (with no successors), -; but the same logic applies to other basic blocks as well. -; The store to %addr.i can be removed since it is a byval attribute -define void @test3(i32* byval %addr.i) { -; CHECK-LABEL: @test3 -; CHECK-NOT: store -; CHECK: fence -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence release - ret void -} - -declare void @foo(i8* nocapture %p) - -declare noalias i8* @malloc(i32) - -; DSE of stores in locations allocated through library calls. -define void @test_nocapture() { -; CHECK-LABEL: @test_nocapture -; CHECK: malloc -; CHECK: foo -; CHECK-NOT: store -; CHECK: fence - %m = call i8* @malloc(i32 24) - call void @foo(i8* %m) - store i8 4, i8* %m - fence release - ret void -} - - -; This is a full fence, but it does not make a thread local store visible. -; We can DSE the store in presence of the fence. -define void @fence_seq_cst() { -; CHECK-LABEL: @fence_seq_cst -; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: ret void - %P1 = alloca i32 - store i32 0, i32* %P1, align 4 - fence seq_cst - store i32 4, i32* %P1, align 4 - ret void -} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll @@ -46,3 +46,50 @@ store i32 5, i32* %addr.i, align 4 ret void } + +; We DSE stack alloc'ed and byval locations, in the presence of fences. +; Fence does not make an otherwise thread local store visible. +; Right now the DSE in presence of fence is only done in end blocks (with no successors), +; but the same logic applies to other basic blocks as well. +; The store to %addr.i can be removed since it is a byval attribute +define void @test3(i32* byval %addr.i) { +; CHECK-LABEL: @test3 +; CHECK-NOT: store +; CHECK: fence +; CHECK: ret + store i32 5, i32* %addr.i, align 4 + fence release + ret void +} + +declare void @foo(i8* nocapture %p) + +declare noalias i8* @malloc(i32) + +; DSE of stores in locations allocated through library calls. +define void @test_nocapture() { +; CHECK-LABEL: @test_nocapture +; CHECK: malloc +; CHECK: foo +; CHECK-NOT: store +; CHECK: fence + %m = call i8* @malloc(i32 24) + call void @foo(i8* %m) + store i8 4, i8* %m + fence release + ret void +} + + +; This is a full fence, but it does not make a thread local store visible. +; We can DSE the store in presence of the fence. +define void @fence_seq_cst() { +; CHECK-LABEL: @fence_seq_cst +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: ret void + %P1 = alloca i32 + store i32 0, i32* %P1, align 4 + fence seq_cst + store i32 4, i32* %P1, align 4 + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -dse -enable-dse-memoryssa -memdep-block-scan-limit=3 < %s | FileCheck %s -; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa -memdep-block-scan-limit=3 < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 < %s | FileCheck %s +; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 < %s | FileCheck %s ; Test case to check that the memory dependency analysis gets the same ; result even if we have a dbg value between the memcpy and diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll @@ -207,15 +207,13 @@ call void @capture(i8* %m) ret i8* %m } -; TODO: Remove store in exit. + ; Stores to stack objects can be eliminated if they are not captured inside the function. define void @test_alloca_nocapture_1() { ; CHECK-LABEL: @test_alloca_nocapture_1( -; CHECK-NEXT: [[M:%.*]] = alloca i8 ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 @@ -228,7 +226,6 @@ ret void } -; TODO: Remove store in exit. ; Cannot remove first store i8 0, i8* %m, as the call to @capture captures the object. define void @test_alloca_capture_1() { ; CHECK-LABEL: @test_alloca_capture_1( @@ -237,7 +234,6 @@ ; CHECK-NEXT: call void @capture(i8* [[M]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 @@ -250,7 +246,6 @@ ret void } -; TODO: Remove store at exit. ; We can remove the last store to %m, even though it escapes because the alloca ; becomes invalid after the function returns. define void @test_alloca_capture_2(%S1* %E) { @@ -260,7 +255,6 @@ ; CHECK: exit: ; CHECK-NEXT: [[F_PTR:%.*]] = getelementptr [[S1:%.*]], %S1* [[E:%.*]], i32 0, i32 0 ; CHECK-NEXT: store i8* [[M]], i8** [[F_PTR]] -; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll @@ -29,7 +29,6 @@ ; CHECK-NEXT: [[C1:%.*]] = cleanuppad within none [] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: store i32 40, i32* [[SV]] ; CHECK-NEXT: ret void ; block1: