Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -692,6 +692,13 @@ for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; + // We can remove the dead stores in the end basic block, irrespective of the + // fence and its ordering (release/acquire/seq_cst). We only need to check + // whether BBI satisfies the constraints, so that it can be removed. Also, + // the fences should never be removed. + if (isa(&*BBI)) + continue; + // If we find a store, check to see if it points into a dead stack value. if (hasMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { // See through pointer-to-pointer bitcasts Index: test/Transforms/DeadStoreElimination/fence.ll =================================================================== --- test/Transforms/DeadStoreElimination/fence.ll +++ test/Transforms/DeadStoreElimination/fence.ll @@ -46,3 +46,45 @@ store i32 5, i32* %addr.i, align 4 ret void } + +; We DSE stack alloc'ed pointer operands, byval attributes, and calloc-like operations at end blocks (contains no successors) irrespective of fence ordering. +; The store to %addr.i can be removed since it is a byval attribute +define void @test3(i32* byval %addr.i) { +; CHECK-LABEL: @test3 +; CHECK-NOT: store +; CHECK: fence +; CHECK: ret + store i32 5, i32* %addr.i, align 4 + fence release + ret void +} + +declare void @foo(i8* nocapture %p) + +declare noalias i8* @malloc(i32) +define void @test_nocapture() { +; CHECK-LABEL: @test_nocapture +; CHECK: malloc +; CHECK: foo +; CHECK-NOT: store +; CHECK: fence + %m = call i8* @malloc(i32 24) + call void @foo(i8* %m) + store i8 4, i8* %m + fence release + ret void +} + + +; This is a full fence, but since the stores is to stack allocated location, we can remove the stores (and leave the fence as-is). +define void @fence_seq_cst(i32* %P2) { +; CHECK-LABEL: @fence_seq_cst +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: ret void + %P1 = alloca i32 + store i32 0, i32* %P1, align 4 + fence seq_cst + store i32 4, i32* %P1, align 4 + ret void +} +