Index: lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- lib/Transforms/Scalar/EarlyCSE.cpp +++ lib/Transforms/Scalar/EarlyCSE.cpp @@ -613,6 +613,17 @@ continue; } + // A release fence requires that all stores complete before it, but does + // not prevent the reordering of following loads 'before' the fence. As a + // result, we don't need to consider it as writing to memory and don't need + // to advance the generation. We do need to prevent DSE across the fence, + // but that's handled above. + if (FenceInst *FI = dyn_cast(Inst)) + if (FI->getOrdering() == Release) { + assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above"); + continue; + } + // Okay, this isn't something we can CSE at all. Check to see if it is // something that could modify memory. If so, our available memory values // cannot be used so bump the generation count. Index: test/Transforms/EarlyCSE/fence.ll =================================================================== --- /dev/null +++ test/Transforms/EarlyCSE/fence.ll @@ -0,0 +1,73 @@ +; RUN: opt -S -early-cse < %s | FileCheck %s + +; We can value forward across the fence since we can (semantically) +; reorder the following load before the fence. +define i32 @test(i32* %addr.i) { +; CHECK-LABEL: @test +; CHECK: store +; CHECK: fence +; CHECK-NOT: load +; CHECK: ret + store i32 5, i32* %addr.i, align 4 + fence release + %a = load i32, i32* %addr.i, align 4 + ret i32 %a +} + +; Same as above +define i32 @test2(i32* noalias %addr.i, i32* noalias %otheraddr) { +; CHECK-LABEL: @test2 +; CHECK: load +; CHECK: fence +; CHECK-NOT: load +; CHECK: ret + %a = load i32, i32* %addr.i, align 4 + fence release + %a2 = load i32, i32* %addr.i, align 4 + %res = sub i32 %a, %a2 + ret i32 %a +} + +; We can not value forward across an acquire barrier since we might +; be syncronizing with another thread storing to the same variable +; followed by a release fence. If this thread observed the release +; had happened, we must present a consistent view of memory at the fence. +define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) { +; CHECK-LABEL: @test3 +; CHECK: load +; CHECK: fence +; CHECK: load +; CHECK: ret + %a = load i32, i32* %addr.i, align 4 + fence acquire + %a2 = load i32, i32* %addr.i, align 4 + %res = sub i32 %a, %a2 + ret i32 %a +} + +; We can NOT dead store eliminate accross the fence +define void @test4(i32* %addr.i) { +; CHECK-LABEL: @test4 +; CHECK: store +; CHECK: fence +; CHECK: store +; CHECK: ret + store i32 5, i32* %addr.i, align 4 + fence release + store i32 5, i32* %addr.i, align 4 + ret void +} + +; We *could* DSE across this fence, but don't. No other thread can +; observe the order of the acquire fence and the store. +define void @test5(i32* %addr.i) { +; CHECK-LABEL: @test5 +; CHECK: store +; CHECK: fence +; CHECK: store +; CHECK: ret + store i32 5, i32* %addr.i, align 4 + fence acquire + store i32 5, i32* %addr.i, align 4 + ret void +}