Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -771,6 +771,14 @@ continue; } + // We can remove the dead stores in the end basic block, irrespective of the + // fence and its ordering (release/acquire/seq_cst). Fences only constraints + // the ordering of already visible stores, it does not make a store visible + // to other threads. So, skipping over a fence does not change a store from + // being dead. + if (isa(&*BBI)) + continue; + MemoryLocation LoadedLoc; // If we encounter a use of the pointer, it is no longer considered dead Index: test/Transforms/DeadStoreElimination/fence.ll =================================================================== --- test/Transforms/DeadStoreElimination/fence.ll +++ test/Transforms/DeadStoreElimination/fence.ll @@ -46,3 +46,45 @@ store i32 5, i32* %addr.i, align 4 ret void } + +; We DSE stack alloc'ed pointer operands, byval attributes, and calloc-like operations at end blocks (contains no successors) irrespective of fence ordering. +; The store to %addr.i can be removed since it is a byval attribute +define void @test3(i32* byval %addr.i) { +; CHECK-LABEL: @test3 +; CHECK-NOT: store +; CHECK: fence +; CHECK: ret + store i32 5, i32* %addr.i, align 4 + fence release + ret void +} + +declare void @foo(i8* nocapture %p) + +declare noalias i8* @malloc(i32) +define void @test_nocapture() { +; CHECK-LABEL: @test_nocapture +; CHECK: malloc +; CHECK: foo +; CHECK-NOT: store +; CHECK: fence + %m = call i8* @malloc(i32 24) + call void @foo(i8* %m) + store i8 4, i8* %m + fence release + ret void +} + + +; This is a full fence, but since the stores is to stack allocated location, we can remove the stores (and leave the fence as-is). +define void @fence_seq_cst(i32* %P2) { +; CHECK-LABEL: @fence_seq_cst +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: ret void + %P1 = alloca i32 + store i32 0, i32* %P1, align 4 + fence seq_cst + store i32 4, i32* %P1, align 4 + ret void +} +