Index: llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp +++ llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp @@ -411,15 +411,6 @@ if (IsTargetMemInst) return Info.WriteMem; return isa(Inst); } - bool isSimple() const { - if (IsTargetMemInst) return Info.IsSimple; - if (LoadInst *LI = dyn_cast(Inst)) { - return LI->isSimple(); - } else if (StoreInst *SI = dyn_cast(Inst)) { - return SI->isSimple(); - } - return Inst->isAtomic(); - } bool isAtomic() const { if (IsTargetMemInst) { assert(Info.IsSimple && "need to refine IsSimple in TTI"); @@ -722,12 +713,17 @@ if (MemInst.isValid() && MemInst.isStore()) { // We do a trivial form of DSE if there are two stores to the same - // location with no intervening loads. Delete the earlier store. Note - // that we can delete an earlier simple store even if the following one - // is ordered/volatile/atomic store. + // location with no intervening loads. Delete the earlier store. + // At the moment, we don't remove ordered stores, but do remove + // unordered atomic stores. There's no special requirement (for + // unordered atomics) about removing atomic stores only in favor of + // other atomic stores since we we're going to execute the non-atomic + // one anyway and the atomic one might never have become visible. if (LastStore) { ParseMemoryInst LastStoreMemInst(LastStore, TTI); - assert(LastStoreMemInst.isSimple() && "Violated invariant"); + assert(LastStoreMemInst.isUnordered() && + !LastStoreMemInst.isVolatile() && + "Violated invariant"); if (LastStoreMemInst.isMatchingMemLoc(MemInst)) { DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: " << *Inst << '\n'); @@ -749,11 +745,14 @@ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), MemInst.isAtomic())); - // Remember that this was the last normal store we saw for DSE. - // Note that we can't delete an earlier atomic or volatile store in - // favor of a later one which isn't. We could in principle remove an - // earlier unordered store if the later one is also unordered. - if (MemInst.isSimple()) + // Remember that this was the last unordered store we saw for DSE. We + // don't yet handle DSE on ordered or volatile stores since we don't + // have a good way to model the ordering requirement for following + // passes once the store is removed. We could insert a fence, but + // since fences are slightly stronger than stores in their ordering, + // it's not clear this is a profitable transform. Another option would + // be to merge the ordering with that of the post dominating store. + if (MemInst.isUnordered() && !MemInst.isVolatile()) LastStore = Inst; else LastStore = nullptr; Index: llvm/trunk/test/Transforms/EarlyCSE/atomics.ll =================================================================== --- llvm/trunk/test/Transforms/EarlyCSE/atomics.ll +++ llvm/trunk/test/Transforms/EarlyCSE/atomics.ll @@ -181,5 +181,79 @@ ret void } +; Can also DSE a unordered store in favor of a normal one +define void @test23(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test23 +; CHECK-NEXT: store i32 0 + store atomic i32 3, i32* %P1 unordered, align 4 + store i32 0, i32* %P1, align 4 + ret void +} + +; As an implementation limitation, can't remove ordered stores +; Note that we could remove the earlier store if we could +; represent the required ordering. +define void @test24(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test24 +; CHECK-NEXT: store atomic +; CHECK-NEXT: store i32 0 + store atomic i32 3, i32* %P1 release, align 4 + store i32 0, i32* %P1, align 4 + ret void +} +; Can't remove volatile stores - each is independently observable and +; the count of such stores is an observable program side effect. +define void @test25(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test25 +; CHECK-NEXT: store volatile +; CHECK-NEXT: store volatile + store volatile i32 3, i32* %P1, align 4 + store volatile i32 0, i32* %P1, align 4 + ret void +} +; Can DSE a unordered store in favor of a unordered one +define void @test26(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test26 +; CHECK-NEXT: store atomic i32 3, i32* %P1 unordered, align 4 +; CHECK-NEXT: ret + store atomic i32 0, i32* %P1 unordered, align 4 + store atomic i32 3, i32* %P1 unordered, align 4 + ret void +} + +; Can DSE a unordered store in favor of a ordered one, +; but current don't due to implementation limits +define void @test27(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test27 +; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4 +; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4 +; CHECK-NEXT: ret + store atomic i32 0, i32* %P1 unordered, align 4 + store atomic i32 3, i32* %P1 release, align 4 + ret void +} + +; Can DSE an unordered atomic store in favor of an +; ordered one, but current don't due to implementation limits +define void @test28(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test28 +; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4 +; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4 +; CHECK-NEXT: ret + store atomic i32 0, i32* %P1 unordered, align 4 + store atomic i32 3, i32* %P1 release, align 4 + ret void +} + +; As an implementation limitation, can't remove ordered stores +; see also: @test24 +define void @test29(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test29 +; CHECK-NEXT: store atomic +; CHECK-NEXT: store atomic + store atomic i32 3, i32* %P1 release, align 4 + store atomic i32 0, i32* %P1 unordered, align 4 + ret void +}