Index: llvm/lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -424,6 +424,16 @@ return false; }; + // Return "true" if and only if the instruction I is either a non-unordered + // load or a non-unordered store. + auto isNonUnorderedLoadOrStore = [](Instruction *I) -> bool { + if (auto *LI = dyn_cast(I)) + return !LI->isUnordered(); + if (auto *SI = dyn_cast(I)) + return !SI->isUnordered(); + return false; + }; + // Return "true" if I is not a load and not a store, but it does access // memory. auto isOtherMemAccess = [](Instruction *I) -> bool { @@ -549,11 +559,18 @@ // A Monotonic store is OK if the query inst is itself not atomic. // FIXME: This is overly conservative. if (!SI->isUnordered() && SI->isAtomic()) { - if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) || + if (!QueryInst || isNonUnorderedLoadOrStore(QueryInst) || isOtherMemAccess(QueryInst)) return MemDepResult::getClobber(SI); - if (SI->getOrdering() != AtomicOrdering::Monotonic) - return MemDepResult::getClobber(SI); + // Ok, if we are here the guard above guarantee us that + // QueryInst is a non-atomic or unordered load/store. + // SI is atomic with monotonic or release semantic (seq_cst for store + // is actually a release semantic plus total order over other seq_cst + // instructions, as soon as QueryInst is not seq_cst we can consider it + // as simple release semantic). + // Monotonic and Release semantic allows re-ordering before store + // so we are safe to go further and check the aliasing. It will prohibit + // re-ordering in case locations are may or must alias. } // While volatile access cannot be eliminated, they do not have to clobber Index: llvm/test/Analysis/MemoryDependenceAnalysis/reorder-over-store-atomic.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/MemoryDependenceAnalysis/reorder-over-store-atomic.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -gvn -basic-aa < %s | FileCheck %s + +@u = global i32 5, align 4 +@w = global i32 10, align 4 + +define i32 @test_load() { +; CHECK-LABEL: @test_load( +; CHECK-NEXT: [[L1:%.*]] = load atomic i32, i32* @w unordered, align 4 +; CHECK-NEXT: [[LV:%.*]] = load atomic i32, i32* @u seq_cst, align 4 +; CHECK-NEXT: [[L2:%.*]] = load atomic i32, i32* @w unordered, align 4 +; CHECK-NEXT: [[RES_1:%.*]] = sub i32 [[L1]], [[L2]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[RES_1]], [[LV]] +; CHECK-NEXT: ret i32 [[RES]] +; + %l1 = load atomic i32, i32* @w unordered, align 4 + %lv = load atomic i32, i32* @u seq_cst, align 4 + %l2 = load atomic i32, i32* @w unordered, align 4 + %res.1 = sub i32 %l1, %l2 + %res = add i32 %res.1, %lv + ret i32 %res +} + +define i32 @test_store(i32 %x) { +; CHECK-LABEL: @test_store( +; CHECK-NEXT: store atomic i32 [[X:%.*]], i32* @u seq_cst, align 4 +; CHECK-NEXT: ret i32 0 +; + %l1 = load atomic i32, i32* @w unordered, align 4 + store atomic i32 %x, i32* @u seq_cst, align 4 + %l2 = load atomic i32, i32* @w unordered, align 4 + %res = sub i32 %l1, %l2 + ret i32 %res +} + +define i32 @test_store_store(i32 %x) { +; CHECK-LABEL: @test_store_store( +; CHECK-NEXT: store atomic i32 [[X:%.*]], i32* @w unordered, align 4 +; CHECK-NEXT: store atomic i32 [[X]], i32* @u release, align 4 +; CHECK-NEXT: store atomic i32 0, i32* @w unordered, align 4 +; CHECK-NEXT: ret i32 0 +; + store atomic i32 %x, i32* @w unordered, align 4 + store atomic i32 %x, i32* @u release, align 4 + store atomic i32 0, i32* @w unordered, align 4 + ret i32 0 +}