diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -626,6 +626,20 @@ setSubclassData(Ordering); } + /// Returns a single ordering which is at least as strong as both the + /// success and failure orderings for this cmpxchg. + AtomicOrdering getMergedOrdering() const { + if (getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) + return AtomicOrdering::SequentiallyConsistent; + if (getFailureOrdering() == AtomicOrdering::Acquire) { + if (getSuccessOrdering() == AtomicOrdering::Monotonic) + return AtomicOrdering::Acquire; + if (getSuccessOrdering() == AtomicOrdering::Release) + return AtomicOrdering::AcquireRelease; + } + return getSuccessOrdering(); + } + /// Returns the synchronization scope ID of this cmpxchg instruction. SyncScope::ID getSyncScopeID() const { return SSID; diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -236,12 +236,13 @@ TLI->shouldExpandAtomicCmpXchgInIR(CASI) == TargetLoweringBase::AtomicExpansionKind::None && (isReleaseOrStronger(CASI->getSuccessOrdering()) || - isAcquireOrStronger(CASI->getSuccessOrdering()))) { + isAcquireOrStronger(CASI->getSuccessOrdering()) || + isAcquireOrStronger(CASI->getFailureOrdering()))) { // If a compare and swap is lowered to LL/SC, we can do smarter fence // insertion, with a stronger one on the success path than on the // failure path. As a result, fence insertion is directly done by // expandAtomicCmpXchg in that case. - FenceOrdering = CASI->getSuccessOrdering(); + FenceOrdering = CASI->getMergedOrdering(); CASI->setSuccessOrdering(AtomicOrdering::Monotonic); CASI->setFailureOrdering(AtomicOrdering::Monotonic); } @@ -1052,7 +1053,7 @@ "NewVal_Shifted"); Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, - CI->getSuccessOrdering()); + CI->getMergedOrdering()); Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); @@ -1167,8 +1168,9 @@ // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we // should preserve the ordering. bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); - AtomicOrdering MemOpOrder = - ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder; + AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic + ? AtomicOrdering::Monotonic + : CI->getMergedOrdering(); // In implementations which use a barrier to achieve release semantics, we can // delay emitting this barrier until we know a store is actually going to be diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll @@ -53,9 +53,9 @@ define void @val_compare_and_swap_monotonic_seqcst(i128* %p, i128 %oldval, i128 %newval) { ; CHECK-LLSC-O1-LABEL: val_compare_and_swap_monotonic_seqcst: -; CHECK-LLSC-O1: ldxp {{x[0-9]+}}, {{x[0-9]+}}, [x0] +; CHECK-LLSC-O1: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0] ; [... LOTS of stuff that is generic IR unrelated to atomic operations ...] -; CHECK-LLSC-O1: stxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0] +; CHECK-LLSC-O1: stlxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0] ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic_seqcst: ; CHECK-CAS-O1: caspal x2, x3, x4, x5, [x0] @@ -76,7 +76,7 @@ define void @val_compare_and_swap_release_acquire(i128* %p, i128 %oldval, i128 %newval) { ; CHECK-LLSC-O1-LABEL: val_compare_and_swap_release_acquire: -; CHECK-LLSC-O1: ldxp {{x[0-9]+}}, {{x[0-9]+}}, [x0] +; CHECK-LLSC-O1: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0] ; [... LOTS of stuff that is generic IR unrelated to atomic operations ...] ; CHECK-LLSC-O1: stlxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0] ; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -214,12 +214,12 @@ ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB4_1: ; %cmpxchg.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0] +; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0] ; CHECK-NOLSE-O1-NEXT: cmp x8, x1 ; CHECK-NOLSE-O1-NEXT: b.ne LBB4_4 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB4_1 Depth=1 -; CHECK-NOLSE-O1-NEXT: stxr w9, x2, [x0] +; CHECK-NOLSE-O1-NEXT: stlxr w9, x2, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB4_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end ; CHECK-NOLSE-O1-NEXT: mov x0, x8 @@ -264,7 +264,7 @@ ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB5_1: ; %cmpxchg.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0] +; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0] ; CHECK-NOLSE-O1-NEXT: cmp x8, x1 ; CHECK-NOLSE-O1-NEXT: b.ne LBB5_4 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore diff --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll --- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll @@ -488,13 +488,16 @@ ; PPC64LE-NEXT: .LBB44_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: bne 0, .LBB44_3 +; PPC64LE-NEXT: bne 0, .LBB44_4 ; PPC64LE-NEXT: # %bb.2: ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: b .LBB44_1 -; PPC64LE-NEXT: .LBB44_3: +; PPC64LE-NEXT: bne 0, .LBB44_1 +; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: lwsync +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .LBB44_4: ; PPC64LE-NEXT: stbcx. 6, 0, 3 +; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release acquire ret void @@ -706,13 +709,16 @@ ; PPC64LE-NEXT: .LBB54_1: ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: bne 0, .LBB54_3 +; PPC64LE-NEXT: bne 0, .LBB54_4 ; PPC64LE-NEXT: # %bb.2: ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: b .LBB54_1 -; PPC64LE-NEXT: .LBB54_3: +; PPC64LE-NEXT: bne 0, .LBB54_1 +; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: lwsync +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .LBB54_4: ; PPC64LE-NEXT: sthcx. 6, 0, 3 +; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release acquire ret void @@ -919,13 +925,16 @@ ; PPC64LE-NEXT: .LBB64_1: ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: bne 0, .LBB64_3 +; PPC64LE-NEXT: bne 0, .LBB64_4 ; PPC64LE-NEXT: # %bb.2: ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: b .LBB64_1 -; PPC64LE-NEXT: .LBB64_3: +; PPC64LE-NEXT: bne 0, .LBB64_1 +; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: lwsync +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .LBB64_4: ; PPC64LE-NEXT: stwcx. 6, 0, 3 +; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release acquire ret void @@ -1127,13 +1136,16 @@ ; PPC64LE-NEXT: .LBB74_1: ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpd 4, 6 -; PPC64LE-NEXT: bne 0, .LBB74_3 +; PPC64LE-NEXT: bne 0, .LBB74_4 ; PPC64LE-NEXT: # %bb.2: ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: b .LBB74_1 -; PPC64LE-NEXT: .LBB74_3: +; PPC64LE-NEXT: bne 0, .LBB74_1 +; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: lwsync +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .LBB74_4: ; PPC64LE-NEXT: stdcx. 6, 0, 3 +; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release acquire ret void @@ -1340,13 +1352,16 @@ ; PPC64LE-NEXT: .LBB84_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: bne 0, .LBB84_3 +; PPC64LE-NEXT: bne 0, .LBB84_4 ; PPC64LE-NEXT: # %bb.2: ; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: b .LBB84_1 -; PPC64LE-NEXT: .LBB84_3: +; PPC64LE-NEXT: bne 0, .LBB84_1 +; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: lwsync +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .LBB84_4: ; PPC64LE-NEXT: stbcx. 6, 0, 3 +; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") release acquire ret void @@ -1558,13 +1573,16 @@ ; PPC64LE-NEXT: .LBB94_1: ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: bne 0, .LBB94_3 +; PPC64LE-NEXT: bne 0, .LBB94_4 ; PPC64LE-NEXT: # %bb.2: ; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: b .LBB94_1 -; PPC64LE-NEXT: .LBB94_3: +; PPC64LE-NEXT: bne 0, .LBB94_1 +; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: lwsync +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .LBB94_4: ; PPC64LE-NEXT: sthcx. 6, 0, 3 +; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") release acquire ret void @@ -1771,13 +1789,16 @@ ; PPC64LE-NEXT: .LBB104_1: ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: bne 0, .LBB104_3 +; PPC64LE-NEXT: bne 0, .LBB104_4 ; PPC64LE-NEXT: # %bb.2: ; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: b .LBB104_1 -; PPC64LE-NEXT: .LBB104_3: +; PPC64LE-NEXT: bne 0, .LBB104_1 +; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: lwsync +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .LBB104_4: ; PPC64LE-NEXT: stwcx. 6, 0, 3 +; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") release acquire ret void @@ -1979,13 +2000,16 @@ ; PPC64LE-NEXT: .LBB114_1: ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpd 4, 6 -; PPC64LE-NEXT: bne 0, .LBB114_3 +; PPC64LE-NEXT: bne 0, .LBB114_4 ; PPC64LE-NEXT: # %bb.2: ; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: b .LBB114_1 -; PPC64LE-NEXT: .LBB114_3: +; PPC64LE-NEXT: bne 0, .LBB114_1 +; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: lwsync +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .LBB114_4: ; PPC64LE-NEXT: stdcx. 6, 0, 3 +; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") release acquire ret void diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -337,7 +337,7 @@ ; RV32IA-NEXT: andi a2, a2, 255 ; RV32IA-NEXT: sll a0, a2, a0 ; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a2, (a3) +; RV32IA-NEXT: lr.w.aq a2, (a3) ; RV32IA-NEXT: and a5, a2, a4 ; RV32IA-NEXT: bne a5, a1, .LBB4_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 @@ -373,7 +373,7 @@ ; RV64IA-NEXT: andi a2, a2, 255 ; RV64IA-NEXT: sllw a0, a2, a0 ; RV64IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a2, (a3) +; RV64IA-NEXT: lr.w.aq a2, (a3) ; RV64IA-NEXT: and a5, a2, a4 ; RV64IA-NEXT: bne a5, a1, .LBB4_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 @@ -1106,7 +1106,7 @@ ; RV32IA-NEXT: and a2, a2, a4 ; RV32IA-NEXT: sll a0, a2, a0 ; RV32IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a2, (a3) +; RV32IA-NEXT: lr.w.aq a2, (a3) ; RV32IA-NEXT: and a4, a2, a5 ; RV32IA-NEXT: bne a4, a1, .LBB14_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 @@ -1143,7 +1143,7 @@ ; RV64IA-NEXT: and a2, a2, a4 ; RV64IA-NEXT: sllw a0, a2, a0 ; RV64IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a2, (a3) +; RV64IA-NEXT: lr.w.aq a2, (a3) ; RV64IA-NEXT: and a4, a2, a5 ; RV64IA-NEXT: bne a4, a1, .LBB14_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1