Index: llvm/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -434,6 +434,10 @@ Type *Ty = cast(Addr->getType())->getElementType(); Constant *DummyVal = Constant::getNullValue(Ty); + // RMW instructions aren't allowed to be unordered in the LangRef. + if (Order == AtomicOrdering::Unordered) + Order = AtomicOrdering::Monotonic; + Value *Pair = Builder.CreateAtomicCmpXchg( Addr, DummyVal, DummyVal, Order, AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); @@ -482,9 +486,15 @@ // It is the responsibility of the target to only signal expansion via // shouldExpandAtomicRMW in cases where this is required and possible. IRBuilder<> Builder(SI); + AtomicOrdering Order = SI->getOrdering(); + + // RMW instructions aren't allowed to be unordered in the LangRef. + if (Order == AtomicOrdering::Unordered) + Order = AtomicOrdering::Monotonic; + AtomicRMWInst *AI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), - SI->getValueOperand(), SI->getOrdering()); + SI->getValueOperand(), Order); SI->eraseFromParent(); // Now we have an appropriate swap instruction, lower it as usual. Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11691,7 +11691,15 @@ TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; + + if (Size != 128) + return AtomicExpansionKind::None; + + // If available, prefer CAS. The code is larger than LDXR/STXR loops but + // doesn't suffer from the starvation issues it was (at least partly) added + // to solve so it's probably better to avoid LDXR across the board. + return Subtarget->hasLSE() ? AtomicExpansionKind::CmpXChg + : AtomicExpansionKind::LLSC; } // For the real atomic operations, we have ldxr/stxr up to 128 bits, @@ -11702,10 +11710,16 @@ unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size > 128) return AtomicExpansionKind::None; - // Nand not supported in LSE. - if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC; - // Leave 128 bits to LLSC. - return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC; + if (!Subtarget->hasLSE()) + return AtomicExpansionKind::LLSC; + + // If available, prefer CAS. The code is larger than LDXR/STXR loops but + // doesn't suffer from the starvation issues it was (at least partly) added + // to solve so it's probably better to avoid LDXR across the board. + if (Size == 128 || AI->getOperation() == AtomicRMWInst::Nand) + return AtomicExpansionKind::CmpXChg; + + return AtomicExpansionKind::None; } TargetLowering::AtomicExpansionKind Index: llvm/test/Transforms/AtomicExpand/AArch64/expand-atomic-i128.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/AtomicExpand/AArch64/expand-atomic-i128.ll @@ -0,0 +1,224 @@ +; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s --check-prefixes=CHECK,LDX +; RUN: opt -S -mtriple=aarch64-- -atomic-expand -mattr=+lse %s | FileCheck %s --check-prefixes=CHECK,CAS + +; Use CAS if available, and LL/SC otherwise. + +define i128 @test_atomicrmw_and_i128(i128* %ptr, i64, i128 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_and_i128( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP2:%.*]] = bitcast i128* [[PTR:%.*]] to i8* +; LDX-NEXT: [[LOHI:%.*]] = call { i64, i64 } @llvm.aarch64.ldaxp(i8* [[TMP2]]) +; LDX-NEXT: [[LO:%.*]] = extractvalue { i64, i64 } [[LOHI]], 0 +; LDX-NEXT: [[HI:%.*]] = extractvalue { i64, i64 } [[LOHI]], 1 +; LDX-NEXT: [[LO64:%.*]] = zext i64 [[LO]] to i128 +; LDX-NEXT: [[HI64:%.*]] = zext i64 [[HI]] to i128 +; LDX-NEXT: [[TMP3:%.*]] = shl i128 [[HI64]], 64 +; LDX-NEXT: [[VAL64:%.*]] = or i128 [[LO64]], [[TMP3]] +; LDX-NEXT: [[NEW:%.*]] = and i128 [[VAL64]], [[V:%.*]] +; LDX-NEXT: [[LO1:%.*]] = trunc i128 [[NEW]] to i64 +; LDX-NEXT: [[TMP4:%.*]] = lshr i128 [[NEW]], 64 +; LDX-NEXT: [[HI2:%.*]] = trunc i128 [[TMP4]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = bitcast i128* [[PTR]] to i8* +; LDX-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxp(i64 [[LO1]], i64 [[HI2]], i8* [[TMP5]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i128 [[VAL64]] +; +; CAS-LABEL: @test_atomicrmw_and_i128( +; CAS-NEXT: [[TMP2:%.*]] = load i128, i128* [[PTR:%.*]], align 16 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP1:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[NEW:%.*]] = and i128 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i128* [[PTR]], i128 [[LOADED]], i128 [[NEW]] acquire acquire +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i128 [[NEWLOADED]] +; + %tmp0 = atomicrmw and i128* %ptr, i128 %v acquire + ret i128 %tmp0 +} + +define i128 @test_atomicrmw_add_i128(i128* %ptr, i64, i128 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_add_i128( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP2:%.*]] = bitcast i128* [[PTR:%.*]] to i8* +; LDX-NEXT: [[LOHI:%.*]] = call { i64, i64 } @llvm.aarch64.ldaxp(i8* [[TMP2]]) +; LDX-NEXT: [[LO:%.*]] = extractvalue { i64, i64 } [[LOHI]], 0 +; LDX-NEXT: [[HI:%.*]] = extractvalue { i64, i64 } [[LOHI]], 1 +; LDX-NEXT: [[LO64:%.*]] = zext i64 [[LO]] to i128 +; LDX-NEXT: [[HI64:%.*]] = zext i64 [[HI]] to i128 +; LDX-NEXT: [[TMP3:%.*]] = shl i128 [[HI64]], 64 +; LDX-NEXT: [[VAL64:%.*]] = or i128 [[LO64]], [[TMP3]] +; LDX-NEXT: [[NEW:%.*]] = add i128 [[VAL64]], [[V:%.*]] +; LDX-NEXT: [[LO1:%.*]] = trunc i128 [[NEW]] to i64 +; LDX-NEXT: [[TMP4:%.*]] = lshr i128 [[NEW]], 64 +; LDX-NEXT: [[HI2:%.*]] = trunc i128 [[TMP4]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = bitcast i128* [[PTR]] to i8* +; LDX-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stlxp(i64 [[LO1]], i64 [[HI2]], i8* [[TMP5]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i128 [[VAL64]] +; +; CAS-LABEL: @test_atomicrmw_add_i128( +; CAS-NEXT: [[TMP2:%.*]] = load i128, i128* [[PTR:%.*]], align 16 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP1:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[NEW:%.*]] = add i128 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i128* [[PTR]], i128 [[LOADED]], i128 [[NEW]] seq_cst seq_cst +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i128 [[NEWLOADED]] +; + %tmp0 = atomicrmw add i128* %ptr, i128 %v seq_cst + ret i128 %tmp0 +} + +define i128 @test_atomicrmw_nand_i128(i128* %ptr, i64, i128 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i128( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP2:%.*]] = bitcast i128* [[PTR:%.*]] to i8* +; LDX-NEXT: [[LOHI:%.*]] = call { i64, i64 } @llvm.aarch64.ldxp(i8* [[TMP2]]) +; LDX-NEXT: [[LO:%.*]] = extractvalue { i64, i64 } [[LOHI]], 0 +; LDX-NEXT: [[HI:%.*]] = extractvalue { i64, i64 } [[LOHI]], 1 +; LDX-NEXT: [[LO64:%.*]] = zext i64 [[LO]] to i128 +; LDX-NEXT: [[HI64:%.*]] = zext i64 [[HI]] to i128 +; LDX-NEXT: [[TMP3:%.*]] = shl i128 [[HI64]], 64 +; LDX-NEXT: [[VAL64:%.*]] = or i128 [[LO64]], [[TMP3]] +; LDX-NEXT: [[TMP4:%.*]] = and i128 [[VAL64]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i128 [[TMP4]], -1 +; LDX-NEXT: [[LO1:%.*]] = trunc i128 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = lshr i128 [[NEW]], 64 +; LDX-NEXT: [[HI2:%.*]] = trunc i128 [[TMP5]] to i64 +; LDX-NEXT: [[TMP6:%.*]] = bitcast i128* [[PTR]] to i8* +; LDX-NEXT: [[TMP7:%.*]] = call i32 @llvm.aarch64.stxp(i64 [[LO1]], i64 [[HI2]], i8* [[TMP6]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP7]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i128 [[VAL64]] +; +; CAS-LABEL: @test_atomicrmw_nand_i128( +; CAS-NEXT: [[TMP2:%.*]] = load i128, i128* [[PTR:%.*]], align 16 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP1:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP3:%.*]] = and i128 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i128 [[TMP3]], -1 +; CAS-NEXT: [[TMP4:%.*]] = cmpxchg i128* [[PTR]], i128 [[LOADED]], i128 [[NEW]] monotonic monotonic +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP4]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i128 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i128* %ptr, i128 %v monotonic + ret i128 %tmp0 +} + +define i128 @test_atomic_load_i128(i128* %ptr) nounwind { +; LDX-LABEL: @test_atomic_load_i128( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = bitcast i128* [[PTR:%.*]] to i8* +; LDX-NEXT: [[LOHI:%.*]] = call { i64, i64 } @llvm.aarch64.ldaxp(i8* [[TMP1]]) +; LDX-NEXT: [[LO:%.*]] = extractvalue { i64, i64 } [[LOHI]], 0 +; LDX-NEXT: [[HI:%.*]] = extractvalue { i64, i64 } [[LOHI]], 1 +; LDX-NEXT: [[LO64:%.*]] = zext i64 [[LO]] to i128 +; LDX-NEXT: [[HI64:%.*]] = zext i64 [[HI]] to i128 +; LDX-NEXT: [[TMP2:%.*]] = shl i128 [[HI64]], 64 +; LDX-NEXT: [[VAL64:%.*]] = or i128 [[LO64]], [[TMP2]] +; LDX-NEXT: [[LO1:%.*]] = trunc i128 [[VAL64]] to i64 +; LDX-NEXT: [[TMP3:%.*]] = lshr i128 [[VAL64]], 64 +; LDX-NEXT: [[HI2:%.*]] = trunc i128 [[TMP3]] to i64 +; LDX-NEXT: [[TMP4:%.*]] = bitcast i128* [[PTR]] to i8* +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stxp(i64 [[LO1]], i64 [[HI2]], i8* [[TMP4]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i128 [[VAL64]] +; +; CAS-LABEL: @test_atomic_load_i128( +; CAS-NEXT: [[TMP1:%.*]] = cmpxchg i128* [[PTR:%.*]], i128 0, i128 0 acquire acquire +; CAS-NEXT: [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0 +; CAS-NEXT: ret i128 [[LOADED]] +; + %tmp0 = load atomic i128, i128* %ptr acquire, align 16 + ret i128 %tmp0 +} + +define void @test_atomic_store_i128(i128* %ptr, i64, i128 %val) nounwind { +; LDX-LABEL: @test_atomic_store_i128( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP2:%.*]] = bitcast i128* [[PTR:%.*]] to i8* +; LDX-NEXT: [[LOHI:%.*]] = call { i64, i64 } @llvm.aarch64.ldxp(i8* [[TMP2]]) +; LDX-NEXT: [[LO:%.*]] = extractvalue { i64, i64 } [[LOHI]], 0 +; LDX-NEXT: [[HI:%.*]] = extractvalue { i64, i64 } [[LOHI]], 1 +; LDX-NEXT: [[LO64:%.*]] = zext i64 [[LO]] to i128 +; LDX-NEXT: [[HI64:%.*]] = zext i64 [[HI]] to i128 +; LDX-NEXT: [[TMP3:%.*]] = shl i128 [[HI64]], 64 +; LDX-NEXT: [[VAL64:%.*]] = or i128 [[LO64]], [[TMP3]] +; LDX-NEXT: [[LO1:%.*]] = trunc i128 [[VAL:%.*]] to i64 +; LDX-NEXT: [[TMP4:%.*]] = lshr i128 [[VAL]], 64 +; LDX-NEXT: [[HI2:%.*]] = trunc i128 [[TMP4]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = bitcast i128* [[PTR]] to i8* +; LDX-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stlxp(i64 [[LO1]], i64 [[HI2]], i8* [[TMP5]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret void +; +; CAS-LABEL: @test_atomic_store_i128( +; CAS-NEXT: [[TMP2:%.*]] = load i128, i128* [[PTR:%.*]], align 16 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i128 [ [[TMP2]], [[TMP1:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i128* [[PTR]], i128 [[LOADED]], i128 [[VAL:%.*]] release monotonic +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i128, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret void +; + store atomic i128 %val, i128* %ptr release, align 16 + ret void +} + +define i128 @test_unordered_load(i128* %ptr) { +; CAS-LABEL: @test_unordered_load( +; CAS: [[PAIR:%.*]] = cmpxchg i128* %ptr, i128 0, i128 0 monotonic monotonic +; CAS-NEXT: [[VAL:%.*]] = extractvalue { i128, i1 } [[PAIR]], 0 +; CAS-NEXT: ret i128 [[VAL]] + + %val = load atomic i128, i128* %ptr unordered, align 16 + ret i128 %val +} + +define void @test_unordered_store(i128 %val, i128* %ptr) { +; CAS-LABEL: @test_unordered_store( +; CAS: [[TRY:%.*]] = load i128, i128* %ptr, align 16 +; CAS-NEXT: br label %[[LOOP:.*]] + +; CAS: [[LOOP]]: +; CAS-NEXT: [[LOADED:%.*]] = phi i128 [ [[TRY]], %0 ], [ [[NEW:%.*]], %[[LOOP]] ] +; CAS-NEXT: [[PAIR:%.*]] = cmpxchg i128* %ptr, i128 [[LOADED]], i128 %val monotonic monotonic +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i128, i1 } [[PAIR]], 1 +; CAS-NEXT: [[NEW]] = extractvalue { i128, i1 } [[PAIR]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label %[[DONE:.*]], label %[[LOOP]] + +; CAS: [[DONE]]: +; CAS-NEXT: ret void + + store atomic i128 %val, i128* %ptr unordered, align 16 + ret void +} Index: llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-nand.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-nand.ll @@ -0,0 +1,655 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s --check-prefixes=CHECK,LDX +; RUN: opt -S -mtriple=aarch64-- -atomic-expand -mattr=+lse %s | FileCheck %s --check-prefixes=CHECK,CAS + +; Use CAS if available, and LL/SC otherwise. + +define i8 @test_atomicrmw_nand_i8_monotonic(i8* %ap, i8 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i8_monotonic( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldxr.p0i8(i8* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i8 +; LDX-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i8 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i8 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stxr.p0i8(i64 [[TMP4]], i8* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i8 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i8_monotonic( +; CAS-NEXT: [[TMP1:%.*]] = load i8, i8* [[AP:%.*]], align 1 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i8 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i8 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i8* [[AP]], i8 [[LOADED]], i8 [[NEW]] monotonic monotonic +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i8, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i8, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i8 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i8* %ap, i8 %v monotonic + ret i8 %tmp0 +} + +define i8 @test_atomicrmw_nand_i8_acquire(i8* %ap, i8 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i8_acquire( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i8(i8* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i8 +; LDX-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i8 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i8 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stxr.p0i8(i64 [[TMP4]], i8* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i8 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i8_acquire( +; CAS-NEXT: [[TMP1:%.*]] = load i8, i8* [[AP:%.*]], align 1 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i8 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i8 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i8* [[AP]], i8 [[LOADED]], i8 [[NEW]] acquire acquire +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i8, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i8, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i8 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i8* %ap, i8 %v acquire + ret i8 %tmp0 +} + +define i8 @test_atomicrmw_nand_i8_release(i8* %ap, i8 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i8_release( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldxr.p0i8(i8* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i8 +; LDX-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i8 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i8 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stlxr.p0i8(i64 [[TMP4]], i8* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i8 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i8_release( +; CAS-NEXT: [[TMP1:%.*]] = load i8, i8* [[AP:%.*]], align 1 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i8 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i8 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i8* [[AP]], i8 [[LOADED]], i8 [[NEW]] release monotonic +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i8, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i8, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i8 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i8* %ap, i8 %v release + ret i8 %tmp0 +} + +define i8 @test_atomicrmw_nand_i8_acq_rel(i8* %ap, i8 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i8_acq_rel( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i8(i8* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i8 +; LDX-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i8 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i8 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stlxr.p0i8(i64 [[TMP4]], i8* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i8 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i8_acq_rel( +; CAS-NEXT: [[TMP1:%.*]] = load i8, i8* [[AP:%.*]], align 1 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i8 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i8 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i8* [[AP]], i8 [[LOADED]], i8 [[NEW]] acq_rel acquire +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i8, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i8, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i8 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i8* %ap, i8 %v acq_rel + ret i8 %tmp0 +} + +define i8 @test_atomicrmw_nand_i8_seq_cst(i8* %ap, i8 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i8_seq_cst( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i8(i8* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i8 +; LDX-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i8 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i8 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stlxr.p0i8(i64 [[TMP4]], i8* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i8 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i8_seq_cst( +; CAS-NEXT: [[TMP1:%.*]] = load i8, i8* [[AP:%.*]], align 1 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i8 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i8 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i8* [[AP]], i8 [[LOADED]], i8 [[NEW]] seq_cst seq_cst +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i8, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i8, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i8 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i8* %ap, i8 %v seq_cst + ret i8 %tmp0 +} + +define i16 @test_atomicrmw_nand_i16_monotonic(i16* %ap, i16 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i16_monotonic( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldxr.p0i16(i16* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16 +; LDX-NEXT: [[TMP3:%.*]] = and i16 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i16 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i16 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stxr.p0i16(i64 [[TMP4]], i16* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i16 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i16_monotonic( +; CAS-NEXT: [[TMP1:%.*]] = load i16, i16* [[AP:%.*]], align 2 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i16 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i16 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i16 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i16* [[AP]], i16 [[LOADED]], i16 [[NEW]] monotonic monotonic +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i16, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i16 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i16* %ap, i16 %v monotonic + ret i16 %tmp0 +} + +define i16 @test_atomicrmw_nand_i16_acquire(i16* %ap, i16 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i16_acquire( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i16(i16* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16 +; LDX-NEXT: [[TMP3:%.*]] = and i16 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i16 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i16 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stxr.p0i16(i64 [[TMP4]], i16* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i16 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i16_acquire( +; CAS-NEXT: [[TMP1:%.*]] = load i16, i16* [[AP:%.*]], align 2 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i16 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i16 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i16 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i16* [[AP]], i16 [[LOADED]], i16 [[NEW]] acquire acquire +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i16, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i16 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i16* %ap, i16 %v acquire + ret i16 %tmp0 +} + +define i16 @test_atomicrmw_nand_i16_release(i16* %ap, i16 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i16_release( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldxr.p0i16(i16* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16 +; LDX-NEXT: [[TMP3:%.*]] = and i16 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i16 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i16 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stlxr.p0i16(i64 [[TMP4]], i16* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i16 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i16_release( +; CAS-NEXT: [[TMP1:%.*]] = load i16, i16* [[AP:%.*]], align 2 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i16 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i16 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i16 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i16* [[AP]], i16 [[LOADED]], i16 [[NEW]] release monotonic +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i16, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i16 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i16* %ap, i16 %v release + ret i16 %tmp0 +} + +define i16 @test_atomicrmw_nand_i16_acq_rel(i16* %ap, i16 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i16_acq_rel( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i16(i16* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16 +; LDX-NEXT: [[TMP3:%.*]] = and i16 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i16 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i16 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stlxr.p0i16(i64 [[TMP4]], i16* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i16 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i16_acq_rel( +; CAS-NEXT: [[TMP1:%.*]] = load i16, i16* [[AP:%.*]], align 2 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i16 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i16 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i16 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i16* [[AP]], i16 [[LOADED]], i16 [[NEW]] acq_rel acquire +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i16, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i16 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i16* %ap, i16 %v acq_rel + ret i16 %tmp0 +} + +define i16 @test_atomicrmw_nand_i16_seq_cst(i16* %ap, i16 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i16_seq_cst( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i16(i16* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16 +; LDX-NEXT: [[TMP3:%.*]] = and i16 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i16 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i16 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stlxr.p0i16(i64 [[TMP4]], i16* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i16 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i16_seq_cst( +; CAS-NEXT: [[TMP1:%.*]] = load i16, i16* [[AP:%.*]], align 2 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i16 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i16 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i16 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i16* [[AP]], i16 [[LOADED]], i16 [[NEW]] seq_cst seq_cst +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i16, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i16 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i16* %ap, i16 %v seq_cst + ret i16 %tmp0 +} + +define i32 @test_atomicrmw_nand_i32_monotonic(i32* %ap, i32 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i32_monotonic( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldxr.p0i32(i32* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; LDX-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i32 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i32 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stxr.p0i32(i64 [[TMP4]], i32* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i32 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i32_monotonic( +; CAS-NEXT: [[TMP1:%.*]] = load i32, i32* [[AP:%.*]], align 4 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i32* [[AP]], i32 [[LOADED]], i32 [[NEW]] monotonic monotonic +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i32 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i32* %ap, i32 %v monotonic + ret i32 %tmp0 +} + +define i32 @test_atomicrmw_nand_i32_acquire(i32* %ap, i32 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i32_acquire( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i32(i32* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; LDX-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i32 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i32 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stxr.p0i32(i64 [[TMP4]], i32* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i32 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i32_acquire( +; CAS-NEXT: [[TMP1:%.*]] = load i32, i32* [[AP:%.*]], align 4 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i32* [[AP]], i32 [[LOADED]], i32 [[NEW]] acquire acquire +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i32 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i32* %ap, i32 %v acquire + ret i32 %tmp0 +} + +define i32 @test_atomicrmw_nand_i32_release(i32* %ap, i32 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i32_release( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldxr.p0i32(i32* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; LDX-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i32 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i32 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stlxr.p0i32(i64 [[TMP4]], i32* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i32 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i32_release( +; CAS-NEXT: [[TMP1:%.*]] = load i32, i32* [[AP:%.*]], align 4 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i32* [[AP]], i32 [[LOADED]], i32 [[NEW]] release monotonic +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i32 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i32* %ap, i32 %v release + ret i32 %tmp0 +} + +define i32 @test_atomicrmw_nand_i32_acq_rel(i32* %ap, i32 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i32_acq_rel( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i32(i32* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; LDX-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i32 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i32 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stlxr.p0i32(i64 [[TMP4]], i32* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i32 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i32_acq_rel( +; CAS-NEXT: [[TMP1:%.*]] = load i32, i32* [[AP:%.*]], align 4 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i32* [[AP]], i32 [[LOADED]], i32 [[NEW]] acq_rel acquire +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i32 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i32* %ap, i32 %v acq_rel + ret i32 %tmp0 +} + +define i32 @test_atomicrmw_nand_i32_seq_cst(i32* %ap, i32 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i32_seq_cst( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i32(i32* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; LDX-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i32 [[TMP3]], -1 +; LDX-NEXT: [[TMP4:%.*]] = zext i32 [[NEW]] to i64 +; LDX-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stlxr.p0i32(i64 [[TMP4]], i32* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i32 [[TMP2]] +; +; CAS-LABEL: @test_atomicrmw_nand_i32_seq_cst( +; CAS-NEXT: [[TMP1:%.*]] = load i32, i32* [[AP:%.*]], align 4 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i32 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i32* [[AP]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i32 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i32* %ap, i32 %v seq_cst + ret i32 %tmp0 +} + +define i64 @test_atomicrmw_nand_i64_monotonic(i64* %ap, i64 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i64_monotonic( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldxr.p0i64(i64* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1 +; LDX-NEXT: [[TMP3:%.*]] = call i32 @llvm.aarch64.stxr.p0i64(i64 [[NEW]], i64* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP3]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i64 [[TMP1]] +; +; CAS-LABEL: @test_atomicrmw_nand_i64_monotonic( +; CAS-NEXT: [[TMP1:%.*]] = load i64, i64* [[AP:%.*]], align 8 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i64 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i64* [[AP]], i64 [[LOADED]], i64 [[NEW]] monotonic monotonic +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i64 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i64* %ap, i64 %v monotonic + ret i64 %tmp0 +} + +define i64 @test_atomicrmw_nand_i64_acquire(i64* %ap, i64 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i64_acquire( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i64(i64* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1 +; LDX-NEXT: [[TMP3:%.*]] = call i32 @llvm.aarch64.stxr.p0i64(i64 [[NEW]], i64* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP3]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i64 [[TMP1]] +; +; CAS-LABEL: @test_atomicrmw_nand_i64_acquire( +; CAS-NEXT: [[TMP1:%.*]] = load i64, i64* [[AP:%.*]], align 8 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i64 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i64* [[AP]], i64 [[LOADED]], i64 [[NEW]] acquire acquire +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i64 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i64* %ap, i64 %v acquire + ret i64 %tmp0 +} + +define i64 @test_atomicrmw_nand_i64_release(i64* %ap, i64 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i64_release( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldxr.p0i64(i64* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1 +; LDX-NEXT: [[TMP3:%.*]] = call i32 @llvm.aarch64.stlxr.p0i64(i64 [[NEW]], i64* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP3]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i64 [[TMP1]] +; +; CAS-LABEL: @test_atomicrmw_nand_i64_release( +; CAS-NEXT: [[TMP1:%.*]] = load i64, i64* [[AP:%.*]], align 8 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i64 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i64* [[AP]], i64 [[LOADED]], i64 [[NEW]] release monotonic +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i64 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i64* %ap, i64 %v release + ret i64 %tmp0 +} + +define i64 @test_atomicrmw_nand_i64_acq_rel(i64* %ap, i64 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i64_acq_rel( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i64(i64* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1 +; LDX-NEXT: [[TMP3:%.*]] = call i32 @llvm.aarch64.stlxr.p0i64(i64 [[NEW]], i64* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP3]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i64 [[TMP1]] +; +; CAS-LABEL: @test_atomicrmw_nand_i64_acq_rel( +; CAS-NEXT: [[TMP1:%.*]] = load i64, i64* [[AP:%.*]], align 8 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i64 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i64* [[AP]], i64 [[LOADED]], i64 [[NEW]] acq_rel acquire +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i64 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i64* %ap, i64 %v acq_rel + ret i64 %tmp0 +} + +define i64 @test_atomicrmw_nand_i64_seq_cst(i64* %ap, i64 %v) nounwind { +; LDX-LABEL: @test_atomicrmw_nand_i64_seq_cst( +; LDX-NEXT: br label [[ATOMICRMW_START:%.*]] +; LDX: atomicrmw.start: +; LDX-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i64(i64* [[AP:%.*]]) +; LDX-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[V:%.*]] +; LDX-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1 +; LDX-NEXT: [[TMP3:%.*]] = call i32 @llvm.aarch64.stlxr.p0i64(i64 [[NEW]], i64* [[AP]]) +; LDX-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP3]], 0 +; LDX-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; LDX: atomicrmw.end: +; LDX-NEXT: ret i64 [[TMP1]] +; +; CAS-LABEL: @test_atomicrmw_nand_i64_seq_cst( +; CAS-NEXT: [[TMP1:%.*]] = load i64, i64* [[AP:%.*]], align 8 +; CAS-NEXT: br label [[ATOMICRMW_START:%.*]] +; CAS: atomicrmw.start: +; CAS-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CAS-NEXT: [[TMP2:%.*]] = and i64 [[LOADED]], [[V:%.*]] +; CAS-NEXT: [[NEW:%.*]] = xor i64 [[TMP2]], -1 +; CAS-NEXT: [[TMP3:%.*]] = cmpxchg i64* [[AP]], i64 [[LOADED]], i64 [[NEW]] seq_cst seq_cst +; CAS-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1 +; CAS-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0 +; CAS-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CAS: atomicrmw.end: +; CAS-NEXT: ret i64 [[NEWLOADED]] +; + %tmp0 = atomicrmw nand i64* %ap, i64 %v seq_cst + ret i64 %tmp0 +}