Index: lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- lib/Analysis/MemoryDependenceAnalysis.cpp +++ lib/Analysis/MemoryDependenceAnalysis.cpp @@ -617,7 +617,16 @@ } if (isInvariantLoad) - continue; + continue; + + // A release fence requires that all stores complete before it, but does + // not prevent the reordering of following loads or stores 'before' the + // fence. As a result, we look past it when finding a dependency for + // loads. DSE uses this to find preceeding stores and thus we can't bypass + // the fence if the query inst is a store. + if (FenceInst *FI = dyn_cast(Inst)) + if (isLoad && FI->getOrdering() == Release) + continue; // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); Index: test/Transforms/DeadStoreElimination/fence.ll =================================================================== --- /dev/null +++ test/Transforms/DeadStoreElimination/fence.ll @@ -0,0 +1,28 @@ +; RUN: opt -S -basicaa -dse < %s | FileCheck %s + +; We can NOT dead store eliminate accross the fence +define void @test1(i32* %addr.i) { +; CHECK-LABEL: @test1 +; CHECK: store +; CHECK: fence +; CHECK: store +; CHECK: ret + store i32 5, i32* %addr.i, align 4 + fence release + store i32 5, i32* %addr.i, align 4 + ret void +} + +; We *could* DSE across this fence, but don't. No other thread can +; observe the order of the acquire fence and the store. +define void @test2(i32* %addr.i) { +; CHECK-LABEL: @test2 +; CHECK: store +; CHECK: fence +; CHECK: store +; CHECK: ret + store i32 5, i32* %addr.i, align 4 + fence acquire + store i32 5, i32* %addr.i, align 4 + ret void +} Index: test/Transforms/GVN/fence.ll =================================================================== --- /dev/null +++ test/Transforms/GVN/fence.ll @@ -0,0 +1,45 @@ +; RUN: opt -S -basicaa -gvn < %s | FileCheck %s + +; We can value forward across the fence since we can (semantically) +; reorder the following load before the fence. +define i32 @test(i32* %addr.i) { +; CHECK-LABEL: @test +; CHECK: store +; CHECK: fence +; CHECK-NOT: load +; CHECK: ret + store i32 5, i32* %addr.i, align 4 + fence release + %a = load i32, i32* %addr.i, align 4 + ret i32 %a +} + +; Same as above +define i32 @test2(i32* %addr.i) { +; CHECK-LABEL: @test2 +; CHECK-NEXT: fence +; CHECK-NOT: load +; CHECK: ret + %a = load i32, i32* %addr.i, align 4 + fence release + %a2 = load i32, i32* %addr.i, align 4 + %res = sub i32 %a, %a2 + ret i32 %res +} + +; We can not value forward across an acquire barrier since we might +; be syncronizing with another thread storing to the same variable +; followed by a release fence. If this thread observed the release +; had happened, we must present a consistent view of memory at the fence. +define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) { +; CHECK-LABEL: @test3 +; CHECK: load +; CHECK: fence +; CHECK: load +; CHECK: ret + %a = load i32, i32* %addr.i, align 4 + fence acquire + %a2 = load i32, i32* %addr.i, align 4 + %res = sub i32 %a, %a2 + ret i32 %res +}