Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1038,9 +1038,9 @@ return &SI; } - // Don't hack volatile/atomic stores. - // FIXME: Some bits are legal for atomic stores; needs refactoring. - if (!SI.isSimple()) return nullptr; + // Don't hack volatile/ordered stores. + // FIXME: Some bits are legal for ordered atomic stores; needs refactoring. + if (!SI.isUnordered()) return nullptr; // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. @@ -1072,7 +1072,7 @@ if (StoreInst *PrevSI = dyn_cast(BBI)) { // Prev store isn't volatile, and stores to the same location? - if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), + if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1))) { ++NumDeadStore; ++BBI; @@ -1086,9 +1086,10 @@ // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast(BBI)) { - if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && - LI->isSimple()) + if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) { + assert(SI.isUnordered() && "can't eliminate ordering operation"); return EraseInstFromFunction(SI); + } // Otherwise, this is a load from some other location. Stores before it // may not be dead. @@ -1114,6 +1115,10 @@ if (isa(Val)) return EraseInstFromFunction(SI); + // The code below needs to be audited and adjusted for unordered atomics + if (!SI.isSimple()) + return nullptr; + // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. Index: llvm/trunk/test/Transforms/InstCombine/store.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/store.ll +++ llvm/trunk/test/Transforms/InstCombine/store.ll @@ -113,6 +113,119 @@ ; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0 } +define void @dse1(i32* %p) { +; CHECK-LABEL: dse1 +; CHECK-NEXT: store +; CHECK-NEXT: ret + store i32 0, i32* %p + store i32 0, i32* %p + ret void +} + +; Slightly subtle: if we're mixing atomic and non-atomic access to the +; same location, then the contents of the location are undefined if there's +; an actual race. As such, we're free to pick either store under the +; assumption that we're not racing with any other thread. +define void @dse2(i32* %p) { +; CHECK-LABEL: dse2 +; CHECK-NEXT: store i32 0, i32* %p +; CHECK-NEXT: ret + store atomic i32 0, i32* %p unordered, align 4 + store i32 0, i32* %p + ret void +} + +define void @dse3(i32* %p) { +; CHECK-LABEL: dse3 +; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4 +; CHECK-NEXT: ret + store i32 0, i32* %p + store atomic i32 0, i32* %p unordered, align 4 + ret void +} + +define void @dse4(i32* %p) { +; CHECK-LABEL: dse4 +; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4 +; CHECK-NEXT: ret + store atomic i32 0, i32* %p unordered, align 4 + store atomic i32 0, i32* %p unordered, align 4 + ret void +} + +; Implementation limit - could remove unordered store here, but +; currently don't. +define void @dse5(i32* %p) { +; CHECK-LABEL: dse5 +; CHECK-NEXT: store +; CHECK-NEXT: store +; CHECK-NEXT: ret + store atomic i32 0, i32* %p unordered, align 4 + store atomic i32 0, i32* %p seq_cst, align 4 + ret void +} + +define void @write_back1(i32* %p) { +; CHECK-LABEL: write_back1 +; CHECK-NEXT: ret + %v = load i32, i32* %p + store i32 %v, i32* %p + ret void +} + +define void @write_back2(i32* %p) { +; CHECK-LABEL: write_back2 +; CHECK-NEXT: ret + %v = load atomic i32, i32* %p unordered, align 4 + store i32 %v, i32* %p + ret void +} + +define void @write_back3(i32* %p) { +; CHECK-LABEL: write_back3 +; CHECK-NEXT: ret + %v = load i32, i32* %p + store atomic i32 %v, i32* %p unordered, align 4 + ret void +} + +define void @write_back4(i32* %p) { +; CHECK-LABEL: write_back4 +; CHECK-NEXT: ret + %v = load atomic i32, i32* %p unordered, align 4 + store atomic i32 %v, i32* %p unordered, align 4 + ret void +} + +; Can't remove store due to ordering side effect +define void @write_back5(i32* %p) { +; CHECK-LABEL: write_back5 +; CHECK-NEXT: load +; CHECK-NEXT: store +; CHECK-NEXT: ret + %v = load atomic i32, i32* %p unordered, align 4 + store atomic i32 %v, i32* %p seq_cst, align 4 + ret void +} + +define void @write_back6(i32* %p) { +; CHECK-LABEL: write_back6 +; CHECK-NEXT: load +; CHECK-NEXT: ret + %v = load atomic i32, i32* %p seq_cst, align 4 + store atomic i32 %v, i32* %p unordered, align 4 + ret void +} + +define void @write_back7(i32* %p) { +; CHECK-LABEL: write_back7 +; CHECK-NEXT: load +; CHECK-NEXT: ret + %v = load atomic volatile i32, i32* %p seq_cst, align 4 + store atomic i32 %v, i32* %p unordered, align 4 + ret void +} + !0 = !{!4, !4, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"}