Index: lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- lib/Analysis/MemoryDependenceAnalysis.cpp +++ lib/Analysis/MemoryDependenceAnalysis.cpp @@ -370,6 +370,14 @@ int64_t MemLocOffset = 0; unsigned Limit = BlockScanLimit; bool isInvariantLoad = false; + + // Atomic accesses are only a problem if there is an acquire after a release. + // See "Compiler testing via a theory of sound optimisations in the C11/C++11 + // memory model" in PLDI 2013 for details. + // This paper does not prove this result for RMW accesses/fences, so we are + // conservative with them. + bool HasSeenAcquire = false; + if (isLoad && QueryInst) { LoadInst *LI = dyn_cast(QueryInst); if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) @@ -409,9 +417,9 @@ // a load depends on another must aliased load from the same value. if (LoadInst *LI = dyn_cast(Inst)) { // Atomic loads have complications involved. - // FIXME: This is overly conservative. - if (!LI->isUnordered()) - return MemDepResult::getClobber(LI); + // Monotonic accesses are not dangerous for this purpose. + if ((!LI->isUnordered()) && LI->getOrdering() != Monotonic) + HasSeenAcquire = true; AliasAnalysis::Location LoadLoc = AA->getLocation(LI); @@ -469,8 +477,8 @@ if (StoreInst *SI = dyn_cast(Inst)) { // Atomic stores have complications involved. - // FIXME: This is overly conservative. - if (!SI->isUnordered()) + // Monotonic accesses are safe for this purpose. + if (HasSeenAcquire && (!SI->isUnordered()) && SI->getOrdering() != Monotonic) return MemDepResult::getClobber(SI); // If alias analysis can tell that this store is guaranteed to not modify @@ -495,6 +503,25 @@ return MemDepResult::getClobber(Inst); } + // FIXME We are probably too conservative here. + if (isa(Inst)) { + return MemDepResult::getClobber(Inst); + } + + // FIXME We are probably too conservative here. + if (auto RMWI = dyn_cast(Inst)) { + if (RMWI->getOrdering() != Monotonic) { + return MemDepResult::getClobber(Inst); + } + } + + // FIXME We are probably too conservative here. + if (auto CASI = dyn_cast(Inst)) { + if (CASI->getSuccessOrdering() != Monotonic) { + return MemDepResult::getClobber(Inst); + } + } + // If this is an allocation, and if we know that the accessed pointer is to // the allocation, return Def. This means that there is no dependence and // the access can be optimized based on that. For example, a load could Index: test/Transforms/DeadStoreElimination/atomic.ll =================================================================== --- test/Transforms/DeadStoreElimination/atomic.ll +++ test/Transforms/DeadStoreElimination/atomic.ll @@ -5,7 +5,7 @@ ; Sanity tests for atomic stores. ; Note that it turns out essentially every transformation DSE does is legal on -; atomic ops, just some transformations are not allowed across them. +; atomic ops, just some transformations are not allowed across them. @x = common global i32 0, align 4 @y = common global i32 0, align 4 @@ -14,7 +14,7 @@ ; DSE across unordered store (allowed) define void @test1() nounwind uwtable ssp { -; CHECK: test1 +; CHECK-LABEL: test1 ; CHECK-NOT: store i32 0 ; CHECK: store i32 1 entry: @@ -24,10 +24,10 @@ ret void } -; DSE across seq_cst load (allowed in theory; not implemented ATM) +; DSE across seq_cst load define i32 @test2() nounwind uwtable ssp { -; CHECK: test2 -; CHECK: store i32 0 +; CHECK-LABEL: test2 +; CHECK-NOT: store i32 0 ; CHECK: store i32 1 entry: store i32 0, i32* @x @@ -36,10 +36,10 @@ ret i32 %x } -; DSE across seq_cst store (store before atomic store must not be removed) +; DSE across seq_cst store define void @test3() nounwind uwtable ssp { -; CHECK: test3 -; CHECK: store i32 +; CHECK-LABEL: test3 +; CHECK-NOT: store i32 ; CHECK: store atomic i32 2 entry: store i32 0, i32* @x @@ -50,7 +50,7 @@ ; DSE remove unordered store (allowed) define void @test4() nounwind uwtable ssp { -; CHECK: test4 +; CHECK-LABEL: test4 ; CHECK-NOT: store atomic ; CHECK: store i32 1 entry: @@ -61,7 +61,8 @@ ; DSE unordered store overwriting non-atomic store (allowed) define void @test5() nounwind uwtable ssp { -; CHECK: test5 +; CHECK-LABEL: test5 +; CHECK-NOT: store ; CHECK: store atomic i32 1 entry: store i32 0, i32* @x @@ -71,7 +72,7 @@ ; DSE no-op unordered atomic store (allowed) define void @test6() nounwind uwtable ssp { -; CHECK: test6 +; CHECK-LABEL: test6 ; CHECK-NOT: store ; CHECK: ret void entry: @@ -83,8 +84,8 @@ ; DSE seq_cst store (be conservative; DSE doesn't have infrastructure ; to reason about atomic operations). define void @test7() nounwind uwtable ssp { -; CHECK: test7 -; CHECK: store atomic +; CHECK-LABEL: test7 +; CHECK: store atomic entry: %a = alloca i32 store atomic i32 0, i32* %a seq_cst, align 4 @@ -94,9 +95,9 @@ ; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure ; to reason about atomic operations). define i32 @test8() nounwind uwtable ssp { -; CHECK: test8 +; CHECK-LABEL: test8 ; CHECK: store -; CHECK: load atomic +; CHECK: load atomic entry: %a = alloca i32 call void @randomop(i32* %a) @@ -105,3 +106,108 @@ ret i32 %x } +; DSE should never eliminate an atomic write based only on a previous read +; if the write is monotonic or stronger. +define i32 @test9() nounwind uwtable ssp { +; CHECK-LABEL: test9 +; CHECK: store atomic +entry: + %x = load atomic i32* @x monotonic, align 4 + store atomic i32 %x, i32 * @x monotonic, align 4 + ret i32 %x +} + +; DSE is allowed to operate across monotonic accesses. +define i32 @test10() nounwind uwtable ssp { +; CHECK-LABEL: test10 +; CHECK-NOT: store i32 0 +; CHECK: store i32 1 +entry: + store i32 0, i32* @x + %x = load atomic i32* @y monotonic, align 4 + store i32 1, i32* @x + ret i32 %x +} + +; DSE is even allowed across a pair of an atomic read and then write. +define i32 @test11() nounwind uwtable ssp { +; CHECK-LABEL: test11 +; CHECK-NOT: store i32 0 +; CHECK: store i32 1 +entry: + store i32 0, i32* @x + %x = load atomic i32* @y seq_cst, align 4 + store atomic i32 %x, i32* @y seq_cst, align 4 + store i32 1, i32* @x + ret i32 %x +} + +; Atomic operations outside of the pair of considered accesses should be ignored. +define i32 @test12() nounwind uwtable ssp { +; CHECK-LABEL: test12 +; CHECK-NOT: store i32 0 +; CHECK: store i32 1 +entry: + store atomic i32 0, i32 * @y seq_cst, align 4 + store i32 0, i32* @x + %x = load atomic i32* @y seq_cst, align 4 + store atomic i32 %x, i32* @y seq_cst, align 4 + store i32 1, i32* @x + %y = load atomic i32* @y seq_cst, align 4 + ret i32 %x +} + +; But DSE is not allowed across a release-acquire pair. +define i32 @test13() nounwind uwtable ssp { +; CHECK-LABEL: test13 +; CHECK: store i32 0 +; CHECK: store i32 1 +entry: + store i32 0, i32* @x + store atomic i32 0, i32* @y release, align 4 + %x = load atomic i32* @y acquire, align 4 + store i32 1, i32* @x + ret i32 %x +} + +; And we are currently treating fences conservatively. +define void @test14() nounwind uwtable ssp { +; CHECK-LABEL: test14 +; CHECK: store i32 0 +; CHECK: store i32 1 +entry: + store i32 0, i32* @x + fence seq_cst + store i32 1, i32* @x + ret void +} + +; We are also treating RMW, and CAS conservatively if they are not monotonic. +define void @test15() nounwind uwtable ssp { +; CHECK-LABEL: test15 +; CHECK: store i32 0 +; CHECK: store i32 1 +; CHECK: store i32 2 +entry: + store i32 0, i32* @x + %b = atomicrmw add i32* @y, i32 1 release + store i32 1, i32* @x + cmpxchg i32* @y, i32 42, i32 1 acquire monotonic + store i32 2, i32* @x + ret void +} + +; But we still allow DSE across them if they are monotonic. +define void @test16() nounwind uwtable ssp { +; CHECK-LABEL: test16 +; CHECK-NOT: store i32 0 +; CHECK-NOT: store i32 1 +; CHECK: store i32 2 +entry: + store i32 0, i32* @x + %b = atomicrmw add i32* @y, i32 1 monotonic + store i32 1, i32* @x + cmpxchg i32* @y, i32 42, i32 1 monotonic monotonic + store i32 2, i32* @x + ret void +} Index: test/Transforms/GVN/atomic.ll =================================================================== --- test/Transforms/GVN/atomic.ll +++ test/Transforms/GVN/atomic.ll @@ -18,10 +18,10 @@ ret i32 %z } -; GVN across seq_cst store (allowed in theory; not implemented ATM) +; GVN across seq_cst store define i32 @test2() nounwind uwtable ssp { ; CHECK: test2 -; CHECK: add i32 %x, %y +; CHECK: add i32 %x, %x entry: %x = load i32* @y store atomic i32 %x, i32* @x seq_cst, align 4 @@ -43,11 +43,11 @@ ret i32 %b } -; GVN across acquire load (load after atomic load must not be removed) +; GVN across acquire load define i32 @test4() nounwind uwtable ssp { ; CHECK: test4 ; CHECK: load atomic i32* @x -; CHECK: load i32* @y +; CHECK-NOT: load i32* @y entry: %x = load i32* @y %y = load atomic i32* @x seq_cst, align 4