diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -209,10 +209,10 @@ return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); } -static RValue EmitBinaryAtomic(CodeGenFunction &CGF, - llvm::AtomicRMWInst::BinOp Kind, - const CallExpr *E) { - return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); +static RValue EmitBinaryAtomic( + CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { + return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E, Ordering)); } /// Utility to insert an atomic instruction based Intrinsic::ID and @@ -244,7 +244,7 @@ Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); llvm::Value *Result = CGF.Builder.CreateAtomicRMW( - Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); + Kind, Args[0], Args[1], llvm::AtomicOrdering::SyncSequentiallyConsistent); Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); if (Invert) Result = @@ -268,8 +268,10 @@ /// /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics /// invoke the function EmitAtomicCmpXchgForMSIntrin. -static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, - bool ReturnBool) { +static Value * +MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool, + llvm::AtomicOrdering Ordering = + llvm::AtomicOrdering::SequentiallyConsistent) { QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); @@ -285,9 +287,8 @@ Args[1] = EmitToInt(CGF, Args[1], T, IntType); Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); - Value *Pair = CGF.Builder.CreateAtomicCmpXchg( - Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, - llvm::AtomicOrdering::SequentiallyConsistent); + Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], + Ordering, Ordering); if (ReturnBool) // Extract boolean success flag and zext it to int. return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), @@ -3811,37 +3812,43 @@ case Builtin::BI__sync_fetch_and_add_4: case Builtin::BI__sync_fetch_and_add_8: case Builtin::BI__sync_fetch_and_add_16: - return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); + return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E, + llvm::AtomicOrdering::SyncSequentiallyConsistent); case Builtin::BI__sync_fetch_and_sub_1: case Builtin::BI__sync_fetch_and_sub_2: case Builtin::BI__sync_fetch_and_sub_4: case Builtin::BI__sync_fetch_and_sub_8: case Builtin::BI__sync_fetch_and_sub_16: - return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); + return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E, + llvm::AtomicOrdering::SyncSequentiallyConsistent); case Builtin::BI__sync_fetch_and_or_1: case Builtin::BI__sync_fetch_and_or_2: case Builtin::BI__sync_fetch_and_or_4: case Builtin::BI__sync_fetch_and_or_8: case Builtin::BI__sync_fetch_and_or_16: - return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); + return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E, + llvm::AtomicOrdering::SyncSequentiallyConsistent); case Builtin::BI__sync_fetch_and_and_1: case Builtin::BI__sync_fetch_and_and_2: case Builtin::BI__sync_fetch_and_and_4: case Builtin::BI__sync_fetch_and_and_8: case Builtin::BI__sync_fetch_and_and_16: - return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); + return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E, + llvm::AtomicOrdering::SyncSequentiallyConsistent); case Builtin::BI__sync_fetch_and_xor_1: case Builtin::BI__sync_fetch_and_xor_2: case Builtin::BI__sync_fetch_and_xor_4: case Builtin::BI__sync_fetch_and_xor_8: case Builtin::BI__sync_fetch_and_xor_16: - return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); + return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E, + llvm::AtomicOrdering::SyncSequentiallyConsistent); case Builtin::BI__sync_fetch_and_nand_1: case Builtin::BI__sync_fetch_and_nand_2: case Builtin::BI__sync_fetch_and_nand_4: case Builtin::BI__sync_fetch_and_nand_8: case Builtin::BI__sync_fetch_and_nand_16: - return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); + return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E, + llvm::AtomicOrdering::SyncSequentiallyConsistent); // Clang extensions: not overloaded yet. case Builtin::BI__sync_fetch_and_min: @@ -3901,14 +3908,16 @@ case Builtin::BI__sync_val_compare_and_swap_4: case Builtin::BI__sync_val_compare_and_swap_8: case Builtin::BI__sync_val_compare_and_swap_16: - return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); + return RValue::get(MakeAtomicCmpXchgValue( + *this, E, false, llvm::AtomicOrdering::SyncSequentiallyConsistent)); case Builtin::BI__sync_bool_compare_and_swap_1: case Builtin::BI__sync_bool_compare_and_swap_2: case Builtin::BI__sync_bool_compare_and_swap_4: case Builtin::BI__sync_bool_compare_and_swap_8: case Builtin::BI__sync_bool_compare_and_swap_16: - return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); + return RValue::get(MakeAtomicCmpXchgValue( + *this, E, true, llvm::AtomicOrdering::SyncSequentiallyConsistent)); case Builtin::BI__sync_swap_1: case Builtin::BI__sync_swap_2: @@ -3922,7 +3931,8 @@ case Builtin::BI__sync_lock_test_and_set_4: case Builtin::BI__sync_lock_test_and_set_8: case Builtin::BI__sync_lock_test_and_set_16: - return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); + return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E, + llvm::AtomicOrdering::SyncAcquire); case Builtin::BI__sync_lock_release_1: case Builtin::BI__sync_lock_release_2: @@ -3938,7 +3948,7 @@ llvm::StoreInst *Store = Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, StoreSize); - Store->setAtomic(llvm::AtomicOrdering::Release); + Store->setAtomic(llvm::AtomicOrdering::SyncRelease); return RValue::get(nullptr); } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5911,13 +5911,16 @@ // operation is also an acquire flush. switch (AO) { case llvm::AtomicOrdering::Acquire: + case llvm::AtomicOrdering::SyncAcquire: case llvm::AtomicOrdering::AcquireRelease: case llvm::AtomicOrdering::SequentiallyConsistent: + case llvm::AtomicOrdering::SyncSequentiallyConsistent: CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, llvm::AtomicOrdering::Acquire); break; case llvm::AtomicOrdering::Monotonic: case llvm::AtomicOrdering::Release: + case llvm::AtomicOrdering::SyncRelease: break; case llvm::AtomicOrdering::NotAtomic: case llvm::AtomicOrdering::Unordered: @@ -5940,12 +5943,15 @@ // the atomic operation is also a release flush. switch (AO) { case llvm::AtomicOrdering::Release: + case llvm::AtomicOrdering::SyncRelease: case llvm::AtomicOrdering::AcquireRelease: case llvm::AtomicOrdering::SequentiallyConsistent: + case llvm::AtomicOrdering::SyncSequentiallyConsistent: CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, llvm::AtomicOrdering::Release); break; case llvm::AtomicOrdering::Acquire: + case llvm::AtomicOrdering::SyncAcquire: case llvm::AtomicOrdering::Monotonic: break; case llvm::AtomicOrdering::NotAtomic: @@ -6131,12 +6137,15 @@ // the atomic operation is also a release flush. switch (AO) { case llvm::AtomicOrdering::Release: + case llvm::AtomicOrdering::SyncRelease: case llvm::AtomicOrdering::AcquireRelease: case llvm::AtomicOrdering::SequentiallyConsistent: + case llvm::AtomicOrdering::SyncSequentiallyConsistent: CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, llvm::AtomicOrdering::Release); break; case llvm::AtomicOrdering::Acquire: + case llvm::AtomicOrdering::SyncAcquire: case llvm::AtomicOrdering::Monotonic: break; case llvm::AtomicOrdering::NotAtomic: @@ -6248,15 +6257,18 @@ // operation is also an acquire flush. switch (AO) { case llvm::AtomicOrdering::Release: + case llvm::AtomicOrdering::SyncRelease: CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, llvm::AtomicOrdering::Release); break; case llvm::AtomicOrdering::Acquire: + case llvm::AtomicOrdering::SyncAcquire: CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, llvm::AtomicOrdering::Acquire); break; case llvm::AtomicOrdering::AcquireRelease: case llvm::AtomicOrdering::SequentiallyConsistent: + case llvm::AtomicOrdering::SyncSequentiallyConsistent: CGF.CGM.getOpenMPRuntime().emitFlush( CGF, llvm::None, Loc, llvm::AtomicOrdering::AcquireRelease); break; diff --git a/clang/test/CodeGen/2010-01-13-MemBarrier.c b/clang/test/CodeGen/2010-01-13-MemBarrier.c --- a/clang/test/CodeGen/2010-01-13-MemBarrier.c +++ b/clang/test/CodeGen/2010-01-13-MemBarrier.c @@ -5,6 +5,6 @@ unsigned t(uint32_t *ptr, uint32_t val) { // CHECK: @t - // CHECK: atomicrmw xchg i32* {{.*}} seq_cst, align 4 + // CHECK: atomicrmw xchg i32* {{.*}} sync_acq, align 4 return __sync_lock_test_and_set(ptr, val); } diff --git a/clang/test/CodeGen/Atomics.c b/clang/test/CodeGen/Atomics.c --- a/clang/test/CodeGen/Atomics.c +++ b/clang/test/CodeGen/Atomics.c @@ -15,61 +15,61 @@ void test_op_ignore (void) // CHECK-LABEL: define{{.*}} void @test_op_ignore { - (void) __sync_fetch_and_add (&sc, 1); // CHECK: atomicrmw add i8* {{.*}} seq_cst, align 1 - (void) __sync_fetch_and_add (&uc, 1); // CHECK: atomicrmw add i8* {{.*}} seq_cst, align 1 - (void) __sync_fetch_and_add (&ss, 1); // CHECK: atomicrmw add i16* {{.*}} seq_cst, align 2 - (void) __sync_fetch_and_add (&us, 1); // CHECK: atomicrmw add i16* {{.*}} seq_cst, align 2 - (void) __sync_fetch_and_add (&si, 1); // CHECK: atomicrmw add i32* {{.*}} seq_cst, align 4 - (void) __sync_fetch_and_add (&ui, 1); // CHECK: atomicrmw add i32* {{.*}} seq_cst, align 4 - (void) __sync_fetch_and_add (&sll, 1); // CHECK: atomicrmw add i64* {{.*}} seq_cst, align 8 - (void) __sync_fetch_and_add (&ull, 1); // CHECK: atomicrmw add i64* {{.*}} seq_cst, align 8 - - (void) __sync_fetch_and_sub (&sc, 1); // CHECK: atomicrmw sub i8* {{.*}} seq_cst, align 1 - (void) __sync_fetch_and_sub (&uc, 1); // CHECK: atomicrmw sub i8* {{.*}} seq_cst, align 1 - (void) __sync_fetch_and_sub (&ss, 1); // CHECK: atomicrmw sub i16* {{.*}} seq_cst, align 2 - (void) __sync_fetch_and_sub (&us, 1); // CHECK: atomicrmw sub i16* {{.*}} seq_cst, align 2 - (void) __sync_fetch_and_sub (&si, 1); // CHECK: atomicrmw sub i32* {{.*}} seq_cst, align 4 - (void) __sync_fetch_and_sub (&ui, 1); // CHECK: atomicrmw sub i32* {{.*}} seq_cst, align 4 - (void) __sync_fetch_and_sub (&sll, 1); // CHECK: atomicrmw sub i64* {{.*}} seq_cst, align 8 - (void) __sync_fetch_and_sub (&ull, 1); // CHECK: atomicrmw sub i64* {{.*}} seq_cst, align 8 - - (void) __sync_fetch_and_or (&sc, 1); // CHECK: atomicrmw or i8* {{.*}} seq_cst, align 1 - (void) __sync_fetch_and_or (&uc, 1); // CHECK: atomicrmw or i8* {{.*}} seq_cst, align 1 - (void) __sync_fetch_and_or (&ss, 1); // CHECK: atomicrmw or i16* {{.*}} seq_cst, align 2 - (void) __sync_fetch_and_or (&us, 1); // CHECK: atomicrmw or i16* {{.*}} seq_cst, align 2 - (void) __sync_fetch_and_or (&si, 1); // CHECK: atomicrmw or i32* {{.*}} seq_cst, align 4 - (void) __sync_fetch_and_or (&ui, 1); // CHECK: atomicrmw or i32* {{.*}} seq_cst, align 4 - (void) __sync_fetch_and_or (&sll, 1); // CHECK: atomicrmw or i64* {{.*}} seq_cst, align 8 - (void) __sync_fetch_and_or (&ull, 1); // CHECK: atomicrmw or i64* {{.*}} seq_cst, align 8 - - (void) __sync_fetch_and_xor (&sc, 1); // CHECK: atomicrmw xor i8* {{.*}} seq_cst, align 1 - (void) __sync_fetch_and_xor (&uc, 1); // CHECK: atomicrmw xor i8* {{.*}} seq_cst, align 1 - (void) __sync_fetch_and_xor (&ss, 1); // CHECK: atomicrmw xor i16* {{.*}} seq_cst, align 2 - (void) __sync_fetch_and_xor (&us, 1); // CHECK: atomicrmw xor i16* {{.*}} seq_cst, align 2 - (void) __sync_fetch_and_xor (&si, 1); // CHECK: atomicrmw xor i32* {{.*}} seq_cst, align 4 - (void) __sync_fetch_and_xor (&ui, 1); // CHECK: atomicrmw xor i32* {{.*}} seq_cst, align 4 - (void) __sync_fetch_and_xor (&sll, 1); // CHECK: atomicrmw xor i64* {{.*}} seq_cst, align 8 - (void) __sync_fetch_and_xor (&ull, 1); // CHECK: atomicrmw xor i64* {{.*}} seq_cst, align 8 - (void) __sync_fetch_and_xor (&u128, 1); // CHECK: atomicrmw xor i128* {{.*}} seq_cst, align 16 - (void) __sync_fetch_and_xor (&s128, 1); // CHECK: atomicrmw xor i128* {{.*}} seq_cst, align 16 - - (void) __sync_fetch_and_nand (&sc, 1); // CHECK: atomicrmw nand i8* {{.*}} seq_cst, align 1 - (void) __sync_fetch_and_nand (&uc, 1); // CHECK: atomicrmw nand i8* {{.*}} seq_cst, align 1 - (void) __sync_fetch_and_nand (&ss, 1); // CHECK: atomicrmw nand i16* {{.*}} seq_cst, align 2 - (void) __sync_fetch_and_nand (&us, 1); // CHECK: atomicrmw nand i16* {{.*}} seq_cst, align 2 - (void) __sync_fetch_and_nand (&si, 1); // CHECK: atomicrmw nand i32* {{.*}} seq_cst, align 4 - (void) __sync_fetch_and_nand (&ui, 1); // CHECK: atomicrmw nand i32* {{.*}} seq_cst, align 4 - (void) __sync_fetch_and_nand (&sll, 1); // CHECK: atomicrmw nand i64* {{.*}} seq_cst, align 8 - (void) __sync_fetch_and_nand (&ull, 1); // CHECK: atomicrmw nand i64* {{.*}} seq_cst, align 8 - - (void) __sync_fetch_and_and (&sc, 1); // CHECK: atomicrmw and i8* {{.*}} seq_cst, align 1 - (void) __sync_fetch_and_and (&uc, 1); // CHECK: atomicrmw and i8* {{.*}} seq_cst, align 1 - (void) __sync_fetch_and_and (&ss, 1); // CHECK: atomicrmw and i16* {{.*}} seq_cst, align 2 - (void) __sync_fetch_and_and (&us, 1); // CHECK: atomicrmw and i16* {{.*}} seq_cst, align 2 - (void) __sync_fetch_and_and (&si, 1); // CHECK: atomicrmw and i32* {{.*}} seq_cst, align 4 - (void) __sync_fetch_and_and (&ui, 1); // CHECK: atomicrmw and i32* {{.*}} seq_cst, align 4 - (void) __sync_fetch_and_and (&sll, 1); // CHECK: atomicrmw and i64* {{.*}} seq_cst, align 8 - (void) __sync_fetch_and_and (&ull, 1); // CHECK: atomicrmw and i64* {{.*}} seq_cst, align 8 + (void) __sync_fetch_and_add (&sc, 1); // CHECK: atomicrmw add i8* {{.*}} sync_seq_cst, align 1 + (void) __sync_fetch_and_add (&uc, 1); // CHECK: atomicrmw add i8* {{.*}} sync_seq_cst, align 1 + (void) __sync_fetch_and_add (&ss, 1); // CHECK: atomicrmw add i16* {{.*}} sync_seq_cst, align 2 + (void) __sync_fetch_and_add (&us, 1); // CHECK: atomicrmw add i16* {{.*}} sync_seq_cst, align 2 + (void) __sync_fetch_and_add (&si, 1); // CHECK: atomicrmw add i32* {{.*}} sync_seq_cst, align 4 + (void) __sync_fetch_and_add (&ui, 1); // CHECK: atomicrmw add i32* {{.*}} sync_seq_cst, align 4 + (void) __sync_fetch_and_add (&sll, 1); // CHECK: atomicrmw add i64* {{.*}} sync_seq_cst, align 8 + (void) __sync_fetch_and_add (&ull, 1); // CHECK: atomicrmw add i64* {{.*}} sync_seq_cst, align 8 + + (void) __sync_fetch_and_sub (&sc, 1); // CHECK: atomicrmw sub i8* {{.*}} sync_seq_cst, align 1 + (void) __sync_fetch_and_sub (&uc, 1); // CHECK: atomicrmw sub i8* {{.*}} sync_seq_cst, align 1 + (void) __sync_fetch_and_sub (&ss, 1); // CHECK: atomicrmw sub i16* {{.*}} sync_seq_cst, align 2 + (void) __sync_fetch_and_sub (&us, 1); // CHECK: atomicrmw sub i16* {{.*}} sync_seq_cst, align 2 + (void) __sync_fetch_and_sub (&si, 1); // CHECK: atomicrmw sub i32* {{.*}} sync_seq_cst, align 4 + (void) __sync_fetch_and_sub (&ui, 1); // CHECK: atomicrmw sub i32* {{.*}} sync_seq_cst, align 4 + (void) __sync_fetch_and_sub (&sll, 1); // CHECK: atomicrmw sub i64* {{.*}} sync_seq_cst, align 8 + (void) __sync_fetch_and_sub (&ull, 1); // CHECK: atomicrmw sub i64* {{.*}} sync_seq_cst, align 8 + + (void) __sync_fetch_and_or (&sc, 1); // CHECK: atomicrmw or i8* {{.*}} sync_seq_cst, align 1 + (void) __sync_fetch_and_or (&uc, 1); // CHECK: atomicrmw or i8* {{.*}} sync_seq_cst, align 1 + (void) __sync_fetch_and_or (&ss, 1); // CHECK: atomicrmw or i16* {{.*}} sync_seq_cst, align 2 + (void) __sync_fetch_and_or (&us, 1); // CHECK: atomicrmw or i16* {{.*}} sync_seq_cst, align 2 + (void) __sync_fetch_and_or (&si, 1); // CHECK: atomicrmw or i32* {{.*}} sync_seq_cst, align 4 + (void) __sync_fetch_and_or (&ui, 1); // CHECK: atomicrmw or i32* {{.*}} sync_seq_cst, align 4 + (void) __sync_fetch_and_or (&sll, 1); // CHECK: atomicrmw or i64* {{.*}} sync_seq_cst, align 8 + (void) __sync_fetch_and_or (&ull, 1); // CHECK: atomicrmw or i64* {{.*}} sync_seq_cst, align 8 + + (void) __sync_fetch_and_xor (&sc, 1); // CHECK: atomicrmw xor i8* {{.*}} sync_seq_cst, align 1 + (void) __sync_fetch_and_xor (&uc, 1); // CHECK: atomicrmw xor i8* {{.*}} sync_seq_cst, align 1 + (void) __sync_fetch_and_xor (&ss, 1); // CHECK: atomicrmw xor i16* {{.*}} sync_seq_cst, align 2 + (void) __sync_fetch_and_xor (&us, 1); // CHECK: atomicrmw xor i16* {{.*}} sync_seq_cst, align 2 + (void) __sync_fetch_and_xor (&si, 1); // CHECK: atomicrmw xor i32* {{.*}} sync_seq_cst, align 4 + (void) __sync_fetch_and_xor (&ui, 1); // CHECK: atomicrmw xor i32* {{.*}} sync_seq_cst, align 4 + (void) __sync_fetch_and_xor (&sll, 1); // CHECK: atomicrmw xor i64* {{.*}} sync_seq_cst, align 8 + (void) __sync_fetch_and_xor (&ull, 1); // CHECK: atomicrmw xor i64* {{.*}} sync_seq_cst, align 8 + (void) __sync_fetch_and_xor (&u128, 1); // CHECK: atomicrmw xor i128* {{.*}} sync_seq_cst, align 16 + (void) __sync_fetch_and_xor (&s128, 1); // CHECK: atomicrmw xor i128* {{.*}} sync_seq_cst, align 16 + + (void) __sync_fetch_and_nand (&sc, 1); // CHECK: atomicrmw nand i8* {{.*}} sync_seq_cst, align 1 + (void) __sync_fetch_and_nand (&uc, 1); // CHECK: atomicrmw nand i8* {{.*}} sync_seq_cst, align 1 + (void) __sync_fetch_and_nand (&ss, 1); // CHECK: atomicrmw nand i16* {{.*}} sync_seq_cst, align 2 + (void) __sync_fetch_and_nand (&us, 1); // CHECK: atomicrmw nand i16* {{.*}} sync_seq_cst, align 2 + (void) __sync_fetch_and_nand (&si, 1); // CHECK: atomicrmw nand i32* {{.*}} sync_seq_cst, align 4 + (void) __sync_fetch_and_nand (&ui, 1); // CHECK: atomicrmw nand i32* {{.*}} sync_seq_cst, align 4 + (void) __sync_fetch_and_nand (&sll, 1); // CHECK: atomicrmw nand i64* {{.*}} sync_seq_cst, align 8 + (void) __sync_fetch_and_nand (&ull, 1); // CHECK: atomicrmw nand i64* {{.*}} sync_seq_cst, align 8 + + (void) __sync_fetch_and_and (&sc, 1); // CHECK: atomicrmw and i8* {{.*}} sync_seq_cst, align 1 + (void) __sync_fetch_and_and (&uc, 1); // CHECK: atomicrmw and i8* {{.*}} sync_seq_cst, align 1 + (void) __sync_fetch_and_and (&ss, 1); // CHECK: atomicrmw and i16* {{.*}} sync_seq_cst, align 2 + (void) __sync_fetch_and_and (&us, 1); // CHECK: atomicrmw and i16* {{.*}} sync_seq_cst, align 2 + (void) __sync_fetch_and_and (&si, 1); // CHECK: atomicrmw and i32* {{.*}} sync_seq_cst, align 4 + (void) __sync_fetch_and_and (&ui, 1); // CHECK: atomicrmw and i32* {{.*}} sync_seq_cst, align 4 + (void) __sync_fetch_and_and (&sll, 1); // CHECK: atomicrmw and i64* {{.*}} sync_seq_cst, align 8 + (void) __sync_fetch_and_and (&ull, 1); // CHECK: atomicrmw and i64* {{.*}} sync_seq_cst, align 8 } @@ -224,92 +224,92 @@ void test_compare_and_swap (void) { sc = __sync_val_compare_and_swap (&sc, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8* {{.*}} seq_cst, align 1 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8* {{.*}} sync_seq_cst, align 1 // CHECK: extractvalue { i8, i1 } [[PAIR]], 0 uc = __sync_val_compare_and_swap (&uc, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8* {{.*}} seq_cst, align 1 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8* {{.*}} sync_seq_cst, align 1 // CHECK: extractvalue { i8, i1 } [[PAIR]], 0 ss = __sync_val_compare_and_swap (&ss, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16* {{.*}} seq_cst, align 2 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16* {{.*}} sync_seq_cst, align 2 // CHECK: extractvalue { i16, i1 } [[PAIR]], 0 us = __sync_val_compare_and_swap (&us, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16* {{.*}} seq_cst, align 2 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16* {{.*}} sync_seq_cst, align 2 // CHECK: extractvalue { i16, i1 } [[PAIR]], 0 si = __sync_val_compare_and_swap (&si, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32* {{.*}} seq_cst, align 4 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32* {{.*}} sync_seq_cst, align 4 // CHECK: extractvalue { i32, i1 } [[PAIR]], 0 ui = __sync_val_compare_and_swap (&ui, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32* {{.*}} seq_cst, align 4 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32* {{.*}} sync_seq_cst, align 4 // CHECK: extractvalue { i32, i1 } [[PAIR]], 0 sll = __sync_val_compare_and_swap (&sll, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64* {{.*}} seq_cst, align 8 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64* {{.*}} sync_seq_cst, align 8 // CHECK: extractvalue { i64, i1 } [[PAIR]], 0 ull = __sync_val_compare_and_swap (&ull, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64* {{.*}} seq_cst, align 8 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64* {{.*}} sync_seq_cst, align 8 // CHECK: extractvalue { i64, i1 } [[PAIR]], 0 ui = __sync_bool_compare_and_swap (&sc, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8* {{.*}} seq_cst, align 1 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8* {{.*}} sync_seq_cst, align 1 // CHECK: extractvalue { i8, i1 } [[PAIR]], 1 ui = __sync_bool_compare_and_swap (&uc, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8* {{.*}} seq_cst, align 1 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8* {{.*}} sync_seq_cst, align 1 // CHECK: extractvalue { i8, i1 } [[PAIR]], 1 ui = __sync_bool_compare_and_swap (&ss, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16* {{.*}} seq_cst, align 2 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16* {{.*}} sync_seq_cst, align 2 // CHECK: extractvalue { i16, i1 } [[PAIR]], 1 ui = __sync_bool_compare_and_swap (&us, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16* {{.*}} seq_cst, align 2 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16* {{.*}} sync_seq_cst, align 2 // CHECK: extractvalue { i16, i1 } [[PAIR]], 1 ui = __sync_bool_compare_and_swap (&si, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32* {{.*}} seq_cst, align 4 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32* {{.*}} sync_seq_cst, align 4 // CHECK: extractvalue { i32, i1 } [[PAIR]], 1 ui = __sync_bool_compare_and_swap (&ui, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32* {{.*}} seq_cst, align 4 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32* {{.*}} sync_seq_cst, align 4 // CHECK: extractvalue { i32, i1 } [[PAIR]], 1 ui = __sync_bool_compare_and_swap (&sll, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64* {{.*}} seq_cst, align 8 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64* {{.*}} sync_seq_cst, align 8 // CHECK: extractvalue { i64, i1 } [[PAIR]], 1 ui = __sync_bool_compare_and_swap (&ull, uc, sc); - // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64* {{.*}} seq_cst, align 8 + // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64* {{.*}} sync_seq_cst, align 8 // CHECK: extractvalue { i64, i1 } [[PAIR]], 1 } void test_lock (void) { - sc = __sync_lock_test_and_set (&sc, 1); // CHECK: atomicrmw xchg i8* {{.*}} seq_cst, align 1 - uc = __sync_lock_test_and_set (&uc, 1); // CHECK: atomicrmw xchg i8* {{.*}} seq_cst, align 1 - ss = __sync_lock_test_and_set (&ss, 1); // CHECK: atomicrmw xchg i16* {{.*}} seq_cst, align 2 - us = __sync_lock_test_and_set (&us, 1); // CHECK: atomicrmw xchg i16* {{.*}} seq_cst, align 2 - si = __sync_lock_test_and_set (&si, 1); // CHECK: atomicrmw xchg i32* {{.*}} seq_cst, align 4 - ui = __sync_lock_test_and_set (&ui, 1); // CHECK: atomicrmw xchg i32* {{.*}} seq_cst, align 4 - sll = __sync_lock_test_and_set (&sll, 1); // CHECK: atomicrmw xchg i64* {{.*}} seq_cst, align 8 - ull = __sync_lock_test_and_set (&ull, 1); // CHECK: atomicrmw xchg i64* {{.*}} seq_cst, align 8 + sc = __sync_lock_test_and_set (&sc, 1); // CHECK: atomicrmw xchg i8* {{.*}} sync_acq, align 1 + uc = __sync_lock_test_and_set (&uc, 1); // CHECK: atomicrmw xchg i8* {{.*}} sync_acq, align 1 + ss = __sync_lock_test_and_set (&ss, 1); // CHECK: atomicrmw xchg i16* {{.*}} sync_acq, align 2 + us = __sync_lock_test_and_set (&us, 1); // CHECK: atomicrmw xchg i16* {{.*}} sync_acq, align 2 + si = __sync_lock_test_and_set (&si, 1); // CHECK: atomicrmw xchg i32* {{.*}} sync_acq, align 4 + ui = __sync_lock_test_and_set (&ui, 1); // CHECK: atomicrmw xchg i32* {{.*}} sync_acq, align 4 + sll = __sync_lock_test_and_set (&sll, 1); // CHECK: atomicrmw xchg i64* {{.*}} sync_acq, align 8 + ull = __sync_lock_test_and_set (&ull, 1); // CHECK: atomicrmw xchg i64* {{.*}} sync_acq, align 8 __sync_synchronize (); // CHECK: fence seq_cst - __sync_lock_release (&sc); // CHECK: store atomic {{.*}} release, align 1 - __sync_lock_release (&uc); // CHECK: store atomic {{.*}} release, align 1 - __sync_lock_release (&ss); // CHECK: store atomic {{.*}} release, align 2 - __sync_lock_release (&us); /// CHECK: store atomic {{.*}} release, align 2 - __sync_lock_release (&si); // CHECK: store atomic {{.*}} release, align 4 - __sync_lock_release (&ui); // CHECK: store atomic {{.*}} release, align 4 - __sync_lock_release (&sll); // CHECK: store atomic {{.*}} release, align 8 - __sync_lock_release (&ull); // CHECK: store atomic {{.*}} release, align 8 + __sync_lock_release (&sc); // CHECK: store atomic {{.*}} sync_rel, align 1 + __sync_lock_release (&uc); // CHECK: store atomic {{.*}} sync_rel, align 1 + __sync_lock_release (&ss); // CHECK: store atomic {{.*}} sync_rel, align 2 + __sync_lock_release (&us); /// CHECK: store atomic {{.*}} sync_rel, align 2 + __sync_lock_release (&si); // CHECK: store atomic {{.*}} sync_rel, align 4 + __sync_lock_release (&ui); // CHECK: store atomic {{.*}} sync_rel, align 4 + __sync_lock_release (&sll); // CHECK: store atomic {{.*}} sync_rel, align 8 + __sync_lock_release (&ull); // CHECK: store atomic {{.*}} sync_rel, align 8 } void test_atomic(void) { diff --git a/clang/test/CodeGen/X86/x86_64-atomic-128.c b/clang/test/CodeGen/X86/x86_64-atomic-128.c --- a/clang/test/CodeGen/X86/x86_64-atomic-128.c +++ b/clang/test/CodeGen/X86/x86_64-atomic-128.c @@ -6,7 +6,7 @@ __int128 test_sync_call(__int128 *addr, __int128 val) { // CHECK-LABEL: @test_sync_call - // CHECK: atomicrmw add i128* {{.*}} seq_cst, align 16 + // CHECK: atomicrmw add i128* {{.*}} sync_seq_cst, align 16 return __sync_fetch_and_add(addr, val); } diff --git a/clang/test/CodeGen/atomic.c b/clang/test/CodeGen/atomic.c --- a/clang/test/CodeGen/atomic.c +++ b/clang/test/CodeGen/atomic.c @@ -11,10 +11,10 @@ int* ptrval; old = __sync_fetch_and_add(&val, 1); - // CHECK: atomicrmw add i32* %val, i32 1 seq_cst, align 4 + // CHECK: atomicrmw add i32* %val, i32 1 sync_seq_cst, align 4 old = __sync_fetch_and_sub(&valc, 2); - // CHECK: atomicrmw sub i8* %valc, i8 2 seq_cst, align 1 + // CHECK: atomicrmw sub i8* %valc, i8 2 sync_seq_cst, align 1 old = __sync_fetch_and_min(&val, 3); // CHECK: atomicrmw min i32* %val, i32 3 seq_cst, align 4 @@ -29,68 +29,68 @@ // CHECK: atomicrmw umax i32* %uval, i32 6 seq_cst, align 4 old = __sync_lock_test_and_set(&val, 7); - // CHECK: atomicrmw xchg i32* %val, i32 7 seq_cst, align 4 + // CHECK: atomicrmw xchg i32* %val, i32 7 sync_acq, align 4 old = __sync_swap(&val, 8); // CHECK: atomicrmw xchg i32* %val, i32 8 seq_cst, align 4 old = __sync_val_compare_and_swap(&val, 4, 1976); - // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i32* %val, i32 4, i32 1976 seq_cst seq_cst, align 4 + // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i32* %val, i32 4, i32 1976 sync_seq_cst sync_seq_cst, align 4 // CHECK: extractvalue { i32, i1 } [[PAIR]], 0 old = __sync_bool_compare_and_swap(&val, 4, 1976); - // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i32* %val, i32 4, i32 1976 seq_cst seq_cst, align 4 + // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i32* %val, i32 4, i32 1976 sync_seq_cst sync_seq_cst, align 4 // CHECK: extractvalue { i32, i1 } [[PAIR]], 1 old = __sync_fetch_and_and(&val, 0x9); - // CHECK: atomicrmw and i32* %val, i32 9 seq_cst, align 4 + // CHECK: atomicrmw and i32* %val, i32 9 sync_seq_cst, align 4 old = __sync_fetch_and_or(&val, 0xa); - // CHECK: atomicrmw or i32* %val, i32 10 seq_cst, align 4 + // CHECK: atomicrmw or i32* %val, i32 10 sync_seq_cst, align 4 old = __sync_fetch_and_xor(&val, 0xb); - // CHECK: atomicrmw xor i32* %val, i32 11 seq_cst, align 4 + // CHECK: atomicrmw xor i32* %val, i32 11 sync_seq_cst, align 4 old = __sync_fetch_and_nand(&val, 0xc); - // CHECK: atomicrmw nand i32* %val, i32 12 seq_cst, align 4 + // CHECK: atomicrmw nand i32* %val, i32 12 sync_seq_cst, align 4 old = __sync_add_and_fetch(&val, 1); - // CHECK: atomicrmw add i32* %val, i32 1 seq_cst, align 4 + // CHECK: atomicrmw add i32* %val, i32 1 sync_seq_cst, align 4 old = __sync_sub_and_fetch(&val, 2); - // CHECK: atomicrmw sub i32* %val, i32 2 seq_cst, align 4 + // CHECK: atomicrmw sub i32* %val, i32 2 sync_seq_cst, align 4 old = __sync_and_and_fetch(&valc, 3); - // CHECK: atomicrmw and i8* %valc, i8 3 seq_cst, align 1 + // CHECK: atomicrmw and i8* %valc, i8 3 sync_seq_cst, align 1 old = __sync_or_and_fetch(&valc, 4); - // CHECK: atomicrmw or i8* %valc, i8 4 seq_cst, align 1 + // CHECK: atomicrmw or i8* %valc, i8 4 sync_seq_cst, align 1 old = __sync_xor_and_fetch(&valc, 5); - // CHECK: atomicrmw xor i8* %valc, i8 5 seq_cst, align 1 + // CHECK: atomicrmw xor i8* %valc, i8 5 sync_seq_cst, align 1 old = __sync_nand_and_fetch(&valc, 6); - // CHECK: atomicrmw nand i8* %valc, i8 6 seq_cst, align 1 + // CHECK: atomicrmw nand i8* %valc, i8 6 sync_seq_cst, align 1 __sync_val_compare_and_swap((void **)0, (void *)0, (void *)0); - // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i32* null, i32 0, i32 0 seq_cst seq_cst, align 4 + // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i32* null, i32 0, i32 0 sync_seq_cst sync_seq_cst, align 4 // CHECK: extractvalue { i32, i1 } [[PAIR]], 0 if ( __sync_val_compare_and_swap(&valb, 0, 1)) { - // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i8* %valb, i8 0, i8 1 seq_cst seq_cst, align 1 + // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i8* %valb, i8 0, i8 1 sync_seq_cst sync_seq_cst, align 1 // CHECK: [[VAL:%[a-z0-9_.]+]] = extractvalue { i8, i1 } [[PAIR]], 0 // CHECK: trunc i8 [[VAL]] to i1 old = 42; } __sync_bool_compare_and_swap((void **)0, (void *)0, (void *)0); - // CHECK: cmpxchg i32* null, i32 0, i32 0 seq_cst seq_cst, align 4 + // CHECK: cmpxchg i32* null, i32 0, i32 0 sync_seq_cst sync_seq_cst, align 4 __sync_lock_release(&val); - // CHECK: store atomic i32 0, {{.*}} release, align 4 + // CHECK: store atomic i32 0, {{.*}} sync_rel, align 4 __sync_lock_release(&ptrval); - // CHECK: store atomic i32 0, {{.*}} release, align 4 + // CHECK: store atomic i32 0, {{.*}} sync_rel, align 4 __sync_synchronize (); // CHECK: fence seq_cst @@ -102,7 +102,7 @@ void release_return(int *lock) { // Ensure this is actually returning void all the way through. return __sync_lock_release(lock); - // CHECK: store atomic {{.*}} release, align 4 + // CHECK: store atomic {{.*}} sync_rel, align 4 } @@ -110,11 +110,11 @@ // CHECK: @addrspace void addrspace(int __attribute__((address_space(256))) * P) { __sync_bool_compare_and_swap(P, 0, 1); - // CHECK: cmpxchg i32 addrspace(256)*{{.*}}, i32 0, i32 1 seq_cst seq_cst, align 4 + // CHECK: cmpxchg i32 addrspace(256)*{{.*}}, i32 0, i32 1 sync_seq_cst sync_seq_cst, align 4 __sync_val_compare_and_swap(P, 0, 1); - // CHECK: cmpxchg i32 addrspace(256)*{{.*}}, i32 0, i32 1 seq_cst seq_cst, align 4 + // CHECK: cmpxchg i32 addrspace(256)*{{.*}}, i32 0, i32 1 sync_seq_cst sync_seq_cst, align 4 __sync_xor_and_fetch(P, 123); - // CHECK: atomicrmw xor i32 addrspace(256)*{{.*}}, i32 123 seq_cst, align 4 + // CHECK: atomicrmw xor i32 addrspace(256)*{{.*}}, i32 123 sync_seq_cst, align 4 } diff --git a/clang/test/CodeGenCXX/atomic.cpp b/clang/test/CodeGenCXX/atomic.cpp --- a/clang/test/CodeGenCXX/atomic.cpp +++ b/clang/test/CodeGenCXX/atomic.cpp @@ -9,7 +9,7 @@ // CHECK-NOT: ret template inline void Ptr<_Tp>::f() { int* _refcount; - // CHECK: atomicrmw add i32* {{.*}} seq_cst, align 4 + // CHECK: atomicrmw add i32* {{.*}} sync_seq_cst, align 4 __sync_fetch_and_add(_refcount, 1); // CHECK-NEXT: ret void } diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -531,7 +531,7 @@ foreach(pat cas swp ldadd ldclr ldeor ldset) foreach(size 1 2 4 8 16) - foreach(model 1 2 3 4) + foreach(model 1 2 3 4 5) if(pat STREQUAL "cas" OR NOT size STREQUAL "16") set(helper_asm "${OA_HELPERS_DIR}/outline_atomic_${pat}${size}_${model}.S") list(APPEND lse_builtins "${helper_asm}") diff --git a/compiler-rt/lib/builtins/aarch64/lse.S b/compiler-rt/lib/builtins/aarch64/lse.S --- a/compiler-rt/lib/builtins/aarch64/lse.S +++ b/compiler-rt/lib/builtins/aarch64/lse.S @@ -7,7 +7,7 @@ // Out-of-line LSE atomics helpers. Ported from libgcc library. // N = {1, 2, 4, 8} // M = {1, 2, 4, 8, 16} -// ORDER = {'relax', 'acq', 'rel', 'acq_rel'} +// ORDER = {'relax', 'acq', 'rel', 'acq_rel', 'sync'} // Routines implemented: // // iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr) @@ -35,8 +35,8 @@ #endif // Generate mnemonics for -// L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4 -// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4 +// L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4,5 +// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4,5 #if SIZE == 1 #define S b @@ -64,24 +64,42 @@ #define L #define M 0x000000 #define N 0x000000 +#define DMB #elif MODEL == 2 #define SUFF _acq #define A a #define L #define M 0x400000 #define N 0x800000 +#define DMB #elif MODEL == 3 #define SUFF _rel #define A #define L l #define M 0x008000 #define N 0x400000 +#define DMB #elif MODEL == 4 #define SUFF _acq_rel #define A a #define L l #define M 0x408000 #define N 0xc00000 +#define DMB +#elif MODEL == 5 +#define SUFF _sync +#ifdef L_swp +#define A a +#define L +#define M 0x400000 +#define N 0x800000 +#else +#define A a +#define L l +#define M 0x408000 +#define N 0xc00000 +#endif +#define DMB dmb ish #else #error #endif // MODEL @@ -96,7 +114,11 @@ #endif #define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF) +#if MODEL == 5 +#define LDXR GLUE3(ld, xr, S) +#else #define LDXR GLUE4(ld, A, xr, S) +#endif #define STXR GLUE4(st, L, xr, S) // Define temporary registers. @@ -136,9 +158,14 @@ STXR w(tmp1), s(1), [x2] cbnz w(tmp1), 0b 1: + DMB ret #else +#if MODEL == 5 +#define LDXP GLUE2(ld, xp) +#else #define LDXP GLUE3(ld, A, xp) +#endif #define STXP GLUE3(st, L, xp) #ifdef HAS_ASM_LSE #define CASP GLUE3(casp, A, L) x0, x1, x2, x3, [x4] @@ -159,6 +186,7 @@ STXP w(tmp2), x2, x3, [x4] cbnz w(tmp2), 0b 1: + DMB ret #endif END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas)) @@ -180,6 +208,7 @@ LDXR s(0), [x1] STXR w(tmp1), s(tmp0), [x1] cbnz w(tmp1), 0b + DBM ret END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp)) #endif // L_swp @@ -224,6 +253,7 @@ OP s(tmp1), s(0), s(tmp0) STXR w(tmp2), s(tmp1), [x1] cbnz w(tmp2), 0b + DBM ret END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM)) #endif // L_ldadd L_ldclr L_ldeor L_ldset diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -339,24 +339,29 @@ LLVMAtomicOrderingMonotonic = 2, /**< guarantees that if you take all the operations affecting a specific address, a consistent ordering exists */ - LLVMAtomicOrderingAcquire = 4, /**< Acquire provides a barrier of the sort - necessary to acquire a lock to access other - memory with normal loads and stores. */ - LLVMAtomicOrderingRelease = 5, /**< Release is similar to Acquire, but with - a barrier of the sort necessary to release - a lock. */ + LLVMAtomicOrderingAcquire = 4, /**< Acquire provides a barrier of the sort + necessary to acquire a lock to access other + memory with normal loads and stores. */ + LLVMAtomicOrderingRelease = 5, /**< Release is similar to Acquire, but with + a barrier of the sort necessary to release + a lock. */ LLVMAtomicOrderingAcquireRelease = 6, /**< provides both an Acquire and a Release barrier (for fences and operations which both read and write memory). */ - LLVMAtomicOrderingSequentiallyConsistent = 7 /**< provides Acquire semantics - for loads and Release - semantics for stores. - Additionally, it guarantees - that a total ordering exists - between all - SequentiallyConsistent - operations. */ + LLVMAtomicOrderingSequentiallyConsistent = 7, /**< provides Acquire semantics + for loads and Release + semantics for stores. + Additionally, it guarantees + that a total ordering exists + between all + SequentiallyConsistent + operations. */ + LLVMAtomicOrderingSyncAcquire = 8, /**< Acquire with additional barrier. */ + LLVMAtomicOrderingSyncRelease = 9, /**< Release with additional barrier. */ + LLVMAtomicOrderingSyncSequentiallyConsistent = + 10 /**< SequentiallyConsistent + with additional barrier. */ } LLVMAtomicOrdering; typedef enum { diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -97,6 +97,9 @@ kw_release, kw_acq_rel, kw_seq_cst, + kw_sync_acq, + kw_sync_rel, + kw_sync_seq_cst, kw_syncscope, kw_nnan, kw_ninf, diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -496,7 +496,10 @@ ORDERING_ACQUIRE = 3, ORDERING_RELEASE = 4, ORDERING_ACQREL = 5, - ORDERING_SEQCST = 6 + ORDERING_SEQCST = 6, + ORDERING_SYNC_ACQ = 7, + ORDERING_SYNC_REL = 8, + ORDERING_SYNC_SEQCST = 9 }; /// Markers and flags for call instruction. diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -49,9 +49,9 @@ mutable unsigned Order = 0; protected: - // The 15 first bits of `Value::SubclassData` are available for subclasses of + // The 16 first bits of `Value::SubclassData` are available for subclasses of // `Instruction` to use. - using OpaqueField = Bitfield::Element; + using OpaqueField = Bitfield::Element; // Template alias so that all Instruction storing alignment use the same // definiton. @@ -68,13 +68,13 @@ template using AtomicOrderingBitfieldElementT = - typename Bitfield::Element; private: // The last bit is used to store whether the instruction has metadata attached // or not. - using HasMetadataField = Bitfield::Element; + using HasMetadataField = Bitfield::Element; protected: ~Instruction(); // Use deleteValue() to delete a generic Instruction. diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -511,10 +511,10 @@ AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID); - template - using AtomicOrderingBitfieldElement = - typename Bitfield::Element; + // template + // using AtomicOrderingBitfieldElement = + // typename Bitfield::Element; protected: // Note: Instruction needs to be a friend here to call cloneImpl. @@ -656,20 +656,24 @@ /// /// If the comparison in a cmpxchg operation fails, there is no atomic store /// so release semantics cannot be provided. So this function drops explicit - /// Release requests from the AtomicOrdering. A SequentiallyConsistent - /// operation would remain SequentiallyConsistent. + /// Release requests from the AtomicOrdering. A SequentiallyConsistent and + /// StrongSequetiallyConsistent operations would remain + /// SequentiallyConsistent. static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering) { switch (SuccessOrdering) { default: llvm_unreachable("invalid cmpxchg success ordering"); case AtomicOrdering::Release: + case AtomicOrdering::SyncRelease: case AtomicOrdering::Monotonic: return AtomicOrdering::Monotonic; case AtomicOrdering::AcquireRelease: case AtomicOrdering::Acquire: + case AtomicOrdering::SyncAcquire: return AtomicOrdering::Acquire; case AtomicOrdering::SequentiallyConsistent: + case AtomicOrdering::SyncSequentiallyConsistent: return AtomicOrdering::SequentiallyConsistent; } } @@ -767,10 +771,10 @@ }; private: - template - using AtomicOrderingBitfieldElement = - typename Bitfield::Element; + // template + // using AtomicOrderingBitfieldElement = + // typename Bitfield::Element; template using BinOpBitfieldElement = diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -583,7 +583,8 @@ HANDLE_LIBCALL(A##N##_RELAX, nullptr) \ HANDLE_LIBCALL(A##N##_ACQ, nullptr) \ HANDLE_LIBCALL(A##N##_REL, nullptr) \ - HANDLE_LIBCALL(A##N##_ACQ_REL, nullptr) + HANDLE_LIBCALL(A##N##_ACQ_REL, nullptr) \ + HANDLE_LIBCALL(A##N##_SYNC, nullptr) #define HLCALL5(A) \ HLCALLS(A, 1) HLCALLS(A, 2) HLCALLS(A, 4) HLCALLS(A, 8) HLCALLS(A, 16) HLCALL5(OUTLINE_ATOMIC_CAS) diff --git a/llvm/include/llvm/Support/AtomicOrdering.h b/llvm/include/llvm/Support/AtomicOrdering.h --- a/llvm/include/llvm/Support/AtomicOrdering.h +++ b/llvm/include/llvm/Support/AtomicOrdering.h @@ -62,7 +62,13 @@ Release = 5, AcquireRelease = 6, SequentiallyConsistent = 7, - LAST = SequentiallyConsistent + // Special orderings for legacy __sync builtins specified by Intel Itanium + // Processor-specific Application Binary Interface section 7.4 + // release-->sync_rel, acquire-->sync_acq, seq_cst-->sync_seq_cst + SyncAcquire = 8, + SyncRelease = 9, + SyncSequentiallyConsistent = 10, + LAST = SyncSequentiallyConsistent }; bool operator<(AtomicOrdering, AtomicOrdering) = delete; @@ -74,45 +80,82 @@ // is a valid AtomicOrdering. template inline bool isValidAtomicOrdering(Int I) { return static_cast(AtomicOrdering::NotAtomic) <= I && - I <= static_cast(AtomicOrdering::SequentiallyConsistent); + I <= static_cast(AtomicOrdering::SyncSequentiallyConsistent); } /// String used by LLVM IR to represent atomic ordering. inline const char *toIRString(AtomicOrdering ao) { - static const char *names[8] = {"not_atomic", "unordered", "monotonic", - "consume", "acquire", "release", - "acq_rel", "seq_cst"}; + static const char *names[11] = {"not_atomic", "unordered", "monotonic", + "consume", "acquire", "release", + "acq_rel", "seq_cst", "sync_acq", + "sync_rel", "sync_seq_cst"}; return names[static_cast(ao)]; } /// Returns true if ao is stronger than other as defined by the AtomicOrdering /// lattice, which is based on C++'s definition. inline bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other) { - static const bool lookup[8][8] = { - // NA UN RX CO AC RE AR SC - /* NotAtomic */ {false, false, false, false, false, false, false, false}, - /* Unordered */ { true, false, false, false, false, false, false, false}, - /* relaxed */ { true, true, false, false, false, false, false, false}, - /* consume */ { true, true, true, false, false, false, false, false}, - /* acquire */ { true, true, true, true, false, false, false, false}, - /* release */ { true, true, true, false, false, false, false, false}, - /* acq_rel */ { true, true, true, true, true, true, false, false}, - /* seq_cst */ { true, true, true, true, true, true, true, false}, + static const bool lookup[11][11] = { + // NA UN RX CO AC RE AR SC + // SAC SRE SSC + /* NotAtomic */ {false, false, false, false, false, false, false, + false, false, false, false}, + /* Unordered */ + {true, false, false, false, false, false, false, false, false, false, + false}, + /* relaxed */ + {true, true, false, false, false, false, false, false, false, false, + false}, + /* consume */ + {true, true, true, false, false, false, false, false, false, false, + false}, + /* acquire */ + {true, true, true, true, false, false, false, false, false, false, false}, + /* release */ + {true, true, true, false, false, false, false, false, false, false, + false}, + /* acq_rel */ + {true, true, true, true, true, true, false, false, false, false, false}, + /* seq_cst */ + {true, true, true, true, true, true, true, false, false, false, false}, + /* sync_acq */ + {true, true, true, true, true, false, false, false, false, false, false}, + /* sync_rel */ + {true, true, true, false, false, true, false, false, false, false, false}, + /* sync_seq_cst */ + {true, true, true, true, true, true, true, true, true, true, false}, }; return lookup[static_cast(AO)][static_cast(Other)]; } inline bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other) { - static const bool lookup[8][8] = { - // NA UN RX CO AC RE AR SC - /* NotAtomic */ { true, false, false, false, false, false, false, false}, - /* Unordered */ { true, true, false, false, false, false, false, false}, - /* relaxed */ { true, true, true, false, false, false, false, false}, - /* consume */ { true, true, true, true, false, false, false, false}, - /* acquire */ { true, true, true, true, true, false, false, false}, - /* release */ { true, true, true, false, false, true, false, false}, - /* acq_rel */ { true, true, true, true, true, true, true, false}, - /* seq_cst */ { true, true, true, true, true, true, true, true}, + static const bool lookup[11][11] = { + // NA UN RX CO AC RE AR SC + // SAC SRE SSC + /* NotAtomic */ {true, false, false, false, false, false, false, false, + false, false, false}, + /* Unordered */ + {true, true, false, false, false, false, false, false, false, false, + false}, + /* relaxed */ + {true, true, true, false, false, false, false, false, false, false, + false}, + /* consume */ + {true, true, true, true, false, false, false, false, false, false, false}, + /* acquire */ + {true, true, true, true, true, false, false, false, false, false, false}, + /* release */ + {true, true, true, false, false, true, false, false, false, false, false}, + /* acq_rel */ + {true, true, true, true, true, true, true, false, false, false, false}, + /* seq_cst */ + {true, true, true, true, true, true, true, true, false, false, false}, + /* sync_acq */ + {true, true, true, true, true, false, false, false, true, false, false}, + /* sync_rel */ + {true, true, true, false, false, true, false, false, false, true, false}, + /* sync_seq_cst */ + {true, true, true, true, true, true, true, true, true, true, true}, }; return lookup[static_cast(AO)][static_cast(Other)]; } @@ -144,16 +187,18 @@ } inline AtomicOrderingCABI toCABI(AtomicOrdering AO) { - static const AtomicOrderingCABI lookup[8] = { - /* NotAtomic */ AtomicOrderingCABI::relaxed, - /* Unordered */ AtomicOrderingCABI::relaxed, - /* relaxed */ AtomicOrderingCABI::relaxed, - /* consume */ AtomicOrderingCABI::consume, - /* acquire */ AtomicOrderingCABI::acquire, - /* release */ AtomicOrderingCABI::release, - /* acq_rel */ AtomicOrderingCABI::acq_rel, - /* seq_cst */ AtomicOrderingCABI::seq_cst, - }; + static const AtomicOrderingCABI lookup[11] = { + /* NotAtomic */ AtomicOrderingCABI::relaxed, + /* Unordered */ AtomicOrderingCABI::relaxed, + /* relaxed */ AtomicOrderingCABI::relaxed, + /* consume */ AtomicOrderingCABI::consume, + /* acquire */ AtomicOrderingCABI::acquire, + /* release */ AtomicOrderingCABI::release, + /* acq_rel */ AtomicOrderingCABI::acq_rel, + /* seq_cst */ AtomicOrderingCABI::seq_cst, + /* sync_acq */ AtomicOrderingCABI::acquire, + /* sync_rel */ AtomicOrderingCABI::release, + /* sync_seq_cst */ AtomicOrderingCABI::seq_cst}; return lookup[static_cast(AO)]; } diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -854,6 +854,12 @@ bit IsAtomicOrderingAcquireRelease = ?; // cast(N)->getOrdering() == AtomicOrdering::SequentiallyConsistent bit IsAtomicOrderingSequentiallyConsistent = ?; + // cast(N)->getOrdering() == AtomicOrdering::SyncAcquire + bit IsAtomicOrderingSyncAcquire = ?; + // cast(N)->getOrdering() == AtomicOrdering::SyncRelease + bit IsAtomicOrderingSyncRelease = ?; + // cast(N)->getOrdering() == AtomicOrdering::SyncSequentiallyConsistent + bit IsAtomicOrderingSyncSequentiallyConsistent = ?; // isAcquireOrStronger(cast(N)->getOrdering()) // !isAcquireOrStronger(cast(N)->getOrdering()) @@ -1526,6 +1532,21 @@ let IsAtomic = true; let IsAtomicOrderingSequentiallyConsistent = true; } + def NAME#_sync_acq : PatFrag<(ops node:$ptr, node:$val), + (!cast(NAME) node:$ptr, node:$val)> { + // let IsAtomic = true; + let IsAtomicOrderingSyncAcquire = true; + } + def NAME#_sync_rel : PatFrag<(ops node:$ptr, node:$val), + (!cast(NAME) node:$ptr, node:$val)> { + // let IsAtomic = true; + let IsAtomicOrderingSyncRelease = true; + } + def NAME#_sync_seq_cst : PatFrag<(ops node:$ptr, node:$val), + (!cast(NAME) node:$ptr, node:$val)> { + // let IsAtomic = true; + let IsAtomicOrderingSyncSequentiallyConsistent = true; + } } multiclass ternary_atomic_op_ord { @@ -1554,6 +1575,21 @@ let IsAtomic = true; let IsAtomicOrderingSequentiallyConsistent = true; } + def NAME#_sync_acq : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val)> { + // let IsAtomic = true; + let IsAtomicOrderingSyncAcquire = true; + } + def NAME#_sync_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val)> { + // let IsAtomic = true; + let IsAtomicOrderingSyncRelease = true; + } + def NAME#_sync_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val)> { + // let IsAtomic = true; + let IsAtomicOrderingSyncSequentiallyConsistent = true; + } } multiclass binary_atomic_op { diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -552,6 +552,9 @@ KEYWORD(release); KEYWORD(acq_rel); KEYWORD(seq_cst); + KEYWORD(sync_acq); + KEYWORD(sync_rel); + KEYWORD(sync_seq_cst); KEYWORD(syncscope); KEYWORD(nnan); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -2286,6 +2286,15 @@ case lltok::kw_seq_cst: Ordering = AtomicOrdering::SequentiallyConsistent; break; + case lltok::kw_sync_acq: + Ordering = AtomicOrdering::SyncAcquire; + break; + case lltok::kw_sync_rel: + Ordering = AtomicOrdering::SyncRelease; + break; + case lltok::kw_sync_seq_cst: + Ordering = AtomicOrdering::SyncSequentiallyConsistent; + break; } Lex.Lex(); return false; diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1255,6 +1255,12 @@ case bitc::ORDERING_ACQUIRE: return AtomicOrdering::Acquire; case bitc::ORDERING_RELEASE: return AtomicOrdering::Release; case bitc::ORDERING_ACQREL: return AtomicOrdering::AcquireRelease; + case bitc::ORDERING_SYNC_ACQ: + return AtomicOrdering::SyncAcquire; + case bitc::ORDERING_SYNC_REL: + return AtomicOrdering::SyncRelease; + case bitc::ORDERING_SYNC_SEQCST: + return AtomicOrdering::SyncSequentiallyConsistent; default: // Map unknown orderings to sequentially-consistent. case bitc::ORDERING_SEQCST: return AtomicOrdering::SequentiallyConsistent; } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -591,6 +591,12 @@ case AtomicOrdering::Release: return bitc::ORDERING_RELEASE; case AtomicOrdering::AcquireRelease: return bitc::ORDERING_ACQREL; case AtomicOrdering::SequentiallyConsistent: return bitc::ORDERING_SEQCST; + case AtomicOrdering::SyncAcquire: + return bitc::ORDERING_SYNC_ACQ; + case AtomicOrdering::SyncRelease: + return bitc::ORDERING_SYNC_REL; + case AtomicOrdering::SyncSequentiallyConsistent: + return bitc::ORDERING_SYNC_SEQCST; } llvm_unreachable("Invalid ordering"); } diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -838,6 +838,13 @@ PerformPartwordOp); } + bool HasAdditionalBarriers = + (MemOpOrder == AtomicOrdering::SyncAcquire || + MemOpOrder == AtomicOrdering::SyncRelease || + MemOpOrder == AtomicOrdering::SyncSequentiallyConsistent); + if (HasAdditionalBarriers) + TLI->emitTrailingFence(Builder, AI, MemOpOrder); + Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); @@ -1010,7 +1017,12 @@ IRBuilder<> Builder(I); Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign, MemOpOrder, PerformOp); - + bool HasAdditionalBarriers = + (MemOpOrder == AtomicOrdering::SyncAcquire || + MemOpOrder == AtomicOrdering::SyncRelease || + MemOpOrder == AtomicOrdering::SyncSequentiallyConsistent); + if (HasAdditionalBarriers) + TLI->emitTrailingFence(Builder, I, MemOpOrder); I->replaceAllUsesWith(Loaded); I->eraseFromParent(); } @@ -1115,6 +1127,7 @@ Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + return Loaded; } @@ -1189,6 +1202,11 @@ SuccessOrder != AtomicOrdering::Acquire && !F->hasMinSize(); + bool HasAdditionalBarriers = + (SuccessOrder == AtomicOrdering::SyncSequentiallyConsistent || + SuccessOrder == AtomicOrdering::SyncAcquire || + SuccessOrder == AtomicOrdering::SyncRelease); + // There's no overhead for sinking the release barrier in a weak cmpxchg, so // do it even on minsize. bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak(); @@ -1321,7 +1339,7 @@ // Make sure later instructions don't get reordered with a fence if // necessary. Builder.SetInsertPoint(SuccessBB); - if (ShouldInsertFencesForAtomic) + if (ShouldInsertFencesForAtomic || HasAdditionalBarriers) TLI->emitTrailingFence(Builder, CI, SuccessOrder); Builder.CreateBr(ExitBB); @@ -1344,7 +1362,7 @@ LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); if (CI->isWeak()) LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); - if (ShouldInsertFencesForAtomic) + if (ShouldInsertFencesForAtomic || HasAdditionalBarriers) TLI->emitTrailingFence(Builder, CI, FailureOrder); Builder.CreateBr(ExitBB); diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -3192,6 +3192,9 @@ .Case("release", AtomicOrdering::Release) .Case("acq_rel", AtomicOrdering::AcquireRelease) .Case("seq_cst", AtomicOrdering::SequentiallyConsistent) + .Case("sync_acq", AtomicOrdering::SyncAcquire) + .Case("sync_rel", AtomicOrdering::SyncRelease) + .Case("sync_seq_cst", AtomicOrdering::SyncSequentiallyConsistent) .Default(AtomicOrdering::NotAtomic); if (Order != AtomicOrdering::NotAtomic) { diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -535,37 +535,42 @@ case AtomicOrdering::SequentiallyConsistent: ModelN = 3; break; + case AtomicOrdering::SyncAcquire: + case AtomicOrdering::SyncRelease: + case AtomicOrdering::SyncSequentiallyConsistent: + ModelN = 4; + break; default: return UNKNOWN_LIBCALL; } #define LCALLS(A, B) \ - { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL } + { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL, A##B##_SYNC } #define LCALL5(A) \ LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16) switch (Opc) { case ISD::ATOMIC_CMP_SWAP: { - const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)}; + const Libcall LC[5][5] = {LCALL5(OUTLINE_ATOMIC_CAS)}; return LC[ModeN][ModelN]; } case ISD::ATOMIC_SWAP: { - const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)}; + const Libcall LC[5][5] = {LCALL5(OUTLINE_ATOMIC_SWP)}; return LC[ModeN][ModelN]; } case ISD::ATOMIC_LOAD_ADD: { - const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)}; + const Libcall LC[5][5] = {LCALL5(OUTLINE_ATOMIC_LDADD)}; return LC[ModeN][ModelN]; } case ISD::ATOMIC_LOAD_OR: { - const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)}; + const Libcall LC[5][5] = {LCALL5(OUTLINE_ATOMIC_LDSET)}; return LC[ModeN][ModelN]; } case ISD::ATOMIC_LOAD_CLR: { - const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)}; + const Libcall LC[5][5] = {LCALL5(OUTLINE_ATOMIC_LDCLR)}; return LC[ModeN][ModelN]; } case ISD::ATOMIC_LOAD_XOR: { - const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)}; + const Libcall LC[5][5] = {LCALL5(OUTLINE_ATOMIC_LDEOR)}; return LC[ModeN][ModelN]; } default: diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -3586,11 +3586,17 @@ case LLVMAtomicOrderingUnordered: return AtomicOrdering::Unordered; case LLVMAtomicOrderingMonotonic: return AtomicOrdering::Monotonic; case LLVMAtomicOrderingAcquire: return AtomicOrdering::Acquire; + case LLVMAtomicOrderingSyncAcquire: + return AtomicOrdering::SyncAcquire; case LLVMAtomicOrderingRelease: return AtomicOrdering::Release; + case LLVMAtomicOrderingSyncRelease: + return AtomicOrdering::SyncRelease; case LLVMAtomicOrderingAcquireRelease: return AtomicOrdering::AcquireRelease; case LLVMAtomicOrderingSequentiallyConsistent: return AtomicOrdering::SequentiallyConsistent; + case LLVMAtomicOrderingSyncSequentiallyConsistent: + return AtomicOrdering::SyncSequentiallyConsistent; } llvm_unreachable("Invalid LLVMAtomicOrdering value!"); @@ -3603,10 +3609,16 @@ case AtomicOrdering::Monotonic: return LLVMAtomicOrderingMonotonic; case AtomicOrdering::Acquire: return LLVMAtomicOrderingAcquire; case AtomicOrdering::Release: return LLVMAtomicOrderingRelease; - case AtomicOrdering::AcquireRelease: return LLVMAtomicOrderingAcquireRelease; case AtomicOrdering::SequentiallyConsistent: return LLVMAtomicOrderingSequentiallyConsistent; + case AtomicOrdering::SyncAcquire: + return LLVMAtomicOrderingSyncAcquire; + case AtomicOrdering::SyncRelease: + return LLVMAtomicOrderingSyncRelease; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SyncSequentiallyConsistent: + return LLVMAtomicOrderingSyncSequentiallyConsistent; } llvm_unreachable("Invalid AtomicOrdering value!"); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -3969,11 +3969,14 @@ void Verifier::visitFenceInst(FenceInst &FI) { const AtomicOrdering Ordering = FI.getOrdering(); Check(Ordering == AtomicOrdering::Acquire || + Ordering == AtomicOrdering::SyncAcquire || Ordering == AtomicOrdering::Release || + Ordering == AtomicOrdering::SyncRelease || Ordering == AtomicOrdering::AcquireRelease || - Ordering == AtomicOrdering::SequentiallyConsistent, - "fence instructions may only have acquire, release, acq_rel, or " - "seq_cst ordering.", + Ordering == AtomicOrdering::SequentiallyConsistent || + Ordering == AtomicOrdering::SyncSequentiallyConsistent, + "fence instructions may only have acquire, release, acq_rel, seq_cst, " + "sync_acq, sync_rel, or sync_seq_cst ordering.", &FI); visitInstruction(FI); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -700,7 +700,8 @@ setLibcallName(A##N##_RELAX, #B #N "_relax"); \ setLibcallName(A##N##_ACQ, #B #N "_acq"); \ setLibcallName(A##N##_REL, #B #N "_rel"); \ - setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel"); + setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel"); \ + setLibcallName(A##N##_SYNC, #B #N "_sync"); #define LCALLNAME4(A, B) \ LCALLNAMES(A, B, 1) \ LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) @@ -20212,7 +20213,11 @@ Type *ValueTy, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - bool IsAcquire = isAcquireOrStronger(Ord); + // assert((Ord == AtomicOrdering::SyncAcquire || + // Ord == AtomicOrdering::SyncRelease) && + // "Sync acquire/release Memory Ordering not expected"); + bool IsAcquire = isAcquireOrStronger(Ord) && + (Ord != llvm::AtomicOrdering::SyncSequentiallyConsistent); // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd // intrinsic must return {i64, i64} and we have to recombine them into a diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -136,12 +136,15 @@ case AtomicOrdering::Monotonic: return RISCV::LR_W; case AtomicOrdering::Acquire: + case AtomicOrdering::SyncAcquire: return RISCV::LR_W_AQ; case AtomicOrdering::Release: + case AtomicOrdering::SyncRelease: return RISCV::LR_W; case AtomicOrdering::AcquireRelease: return RISCV::LR_W_AQ; case AtomicOrdering::SequentiallyConsistent: + case AtomicOrdering::SyncSequentiallyConsistent: return RISCV::LR_W_AQ_RL; } } @@ -153,12 +156,15 @@ case AtomicOrdering::Monotonic: return RISCV::SC_W; case AtomicOrdering::Acquire: + case AtomicOrdering::SyncAcquire: return RISCV::SC_W; case AtomicOrdering::Release: + case AtomicOrdering::SyncRelease: return RISCV::SC_W_RL; case AtomicOrdering::AcquireRelease: return RISCV::SC_W_RL; case AtomicOrdering::SequentiallyConsistent: + case AtomicOrdering::SyncSequentiallyConsistent: return RISCV::SC_W_AQ_RL; } } @@ -170,12 +176,15 @@ case AtomicOrdering::Monotonic: return RISCV::LR_D; case AtomicOrdering::Acquire: + case AtomicOrdering::SyncAcquire: return RISCV::LR_D_AQ; case AtomicOrdering::Release: + case AtomicOrdering::SyncRelease: return RISCV::LR_D; case AtomicOrdering::AcquireRelease: return RISCV::LR_D_AQ; case AtomicOrdering::SequentiallyConsistent: + case AtomicOrdering::SyncSequentiallyConsistent: return RISCV::LR_D_AQ_RL; } } @@ -187,12 +196,15 @@ case AtomicOrdering::Monotonic: return RISCV::SC_D; case AtomicOrdering::Acquire: + case AtomicOrdering::SyncAcquire: return RISCV::SC_D; case AtomicOrdering::Release: + case AtomicOrdering::SyncRelease: return RISCV::SC_D_RL; case AtomicOrdering::AcquireRelease: return RISCV::SC_D_RL; case AtomicOrdering::SequentiallyConsistent: + case AtomicOrdering::SyncSequentiallyConsistent: return RISCV::SC_D_AQ_RL; } } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11937,7 +11937,9 @@ Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const { - if (isa(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) + if (isa(Inst) && + (Ord == AtomicOrdering::SequentiallyConsistent || + Ord == AtomicOrdering::SyncSequentiallyConsistent)) return Builder.CreateFence(Ord); if (isa(Inst) && isReleaseOrStronger(Ord)) return Builder.CreateFence(AtomicOrdering::Release); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1486,6 +1486,12 @@ def : Pat<(atomic_fence (XLenVT 6), (timm)), (FENCE_TSO)>; // fence seq_cst -> fence rw, rw def : Pat<(atomic_fence (XLenVT 7), (timm)), (FENCE 0b11, 0b11)>; +// fence sync_acq -> fence r, rw +def : Pat<(atomic_fence (XLenVT 8), (timm)), (FENCE 0b10, 0b11)>; +// fence sync_rel -> fence rw, w +def : Pat<(atomic_fence (XLenVT 9), (timm)), (FENCE 0b11, 0b1)>; +// fence sync_seq_cst -> fence rw, rw +def : Pat<(atomic_fence (XLenVT 10), (timm)), (FENCE 0b11, 0b11)>; // Lowering for atomic load and store is defined in RISCVInstrInfoA.td. // Although these are lowered to fence+load/store instructions defined in the diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -133,6 +133,12 @@ !cast(BaseInst#"_AQ_RL")>; def : PatGprGpr(AtomicOp#"_seq_cst"), !cast(BaseInst#"_AQ_RL")>; + def : PatGprGpr(AtomicOp#"_sync_acq"), + !cast(BaseInst#"_AQ")>; + def : PatGprGpr(AtomicOp#"_sync_rel"), + !cast(BaseInst#"_RL")>; + def : PatGprGpr(AtomicOp#"_sync_seq_cst"), + !cast(BaseInst#"_AQ_RL")>; } defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">; @@ -155,6 +161,12 @@ (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>; def : Pat<(atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr), (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>; +def : Pat<(atomic_load_sub_32_sync_acq GPR:$addr, GPR:$incr), + (AMOADD_W_AQ GPR:$addr, (SUB X0, GPR:$incr))>; +def : Pat<(atomic_load_sub_32_sync_rel GPR:$addr, GPR:$incr), + (AMOADD_W_RL GPR:$addr, (SUB X0, GPR:$incr))>; +def : Pat<(atomic_load_sub_32_sync_seq_cst GPR:$addr, GPR:$incr), + (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>; /// Pseudo AMOs @@ -180,6 +192,12 @@ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>; def : Pat<(atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr), (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>; +def : Pat<(atomic_load_nand_32_sync_acq GPR:$addr, GPR:$incr), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 8)>; +def : Pat<(atomic_load_nand_32_sync_rel GPR:$addr, GPR:$incr), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 9)>; +def : Pat<(atomic_load_nand_32_sync_seq_cst GPR:$addr, GPR:$incr), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 10)>; class PseudoMaskedAMO : Pseudo<(outs GPR:$res, GPR:$scratch), @@ -279,6 +297,12 @@ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; def : Pat<(!cast(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new), (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; + def : Pat<(!cast(Op#"_sync_acq") GPR:$addr, GPR:$cmp, GPR:$new), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 8)>; + def : Pat<(!cast(Op#"_sync_rel") GPR:$addr, GPR:$cmp, GPR:$new), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 9)>; + def : Pat<(!cast(Op#"_sync_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 10)>; } def PseudoCmpXchg32 : PseudoCmpXchg; @@ -333,6 +357,12 @@ (AMOADD_D_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>; def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)), (AMOADD_D_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>; +def : Pat<(i64 (atomic_load_sub_64_sync_acq GPR:$addr, GPR:$incr)), + (AMOADD_D_AQ GPR:$addr, (SUB X0, GPR:$incr))>; +def : Pat<(i64 (atomic_load_sub_64_sync_rel GPR:$addr, GPR:$incr)), + (AMOADD_D_RL GPR:$addr, (SUB X0, GPR:$incr))>; +def : Pat<(i64 (atomic_load_sub_64_sync_seq_cst GPR:$addr, GPR:$incr)), + (AMOADD_D_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>; /// 64-bit pseudo AMOs @@ -350,6 +380,12 @@ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>; def : Pat<(i64 (atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr)), (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>; +def : Pat<(i64 (atomic_load_nand_64_sync_acq GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 8)>; +def : Pat<(i64 (atomic_load_nand_64_sync_rel GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 9)>; +def : Pat<(i64 (atomic_load_nand_64_sync_seq_cst GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 10)>; def : PseudoMaskedAMOPat; diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -2160,11 +2160,16 @@ case AtomicOrdering::Monotonic: case AtomicOrdering::Acquire: return AtomicOrdering::Acquire; + case AtomicOrdering::SyncAcquire: + return AtomicOrdering::SyncAcquire; case AtomicOrdering::Release: case AtomicOrdering::AcquireRelease: return AtomicOrdering::AcquireRelease; case AtomicOrdering::SequentiallyConsistent: return AtomicOrdering::SequentiallyConsistent; + case AtomicOrdering::SyncRelease: + case AtomicOrdering::SyncSequentiallyConsistent: + return AtomicOrdering::SyncSequentiallyConsistent; } llvm_unreachable("Unknown ordering"); } @@ -2445,11 +2450,16 @@ case AtomicOrdering::Monotonic: case AtomicOrdering::Release: return AtomicOrdering::Release; + case AtomicOrdering::SyncRelease: + return AtomicOrdering::SyncRelease; case AtomicOrdering::Acquire: case AtomicOrdering::AcquireRelease: return AtomicOrdering::AcquireRelease; case AtomicOrdering::SequentiallyConsistent: return AtomicOrdering::SequentiallyConsistent; + case AtomicOrdering::SyncAcquire: + case AtomicOrdering::SyncSequentiallyConsistent: + return AtomicOrdering::SyncSequentiallyConsistent; } llvm_unreachable("Unknown ordering"); } diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1797,11 +1797,16 @@ case AtomicOrdering::Monotonic: case AtomicOrdering::Release: return AtomicOrdering::Release; + case AtomicOrdering::SyncRelease: + return AtomicOrdering::SyncRelease; case AtomicOrdering::Acquire: case AtomicOrdering::AcquireRelease: return AtomicOrdering::AcquireRelease; case AtomicOrdering::SequentiallyConsistent: return AtomicOrdering::SequentiallyConsistent; + case AtomicOrdering::SyncAcquire: + case AtomicOrdering::SyncSequentiallyConsistent: + return AtomicOrdering::SyncSequentiallyConsistent; } llvm_unreachable("Unknown ordering"); } @@ -1832,11 +1837,16 @@ case AtomicOrdering::Monotonic: case AtomicOrdering::Acquire: return AtomicOrdering::Acquire; + case AtomicOrdering::SyncAcquire: + return AtomicOrdering::SyncAcquire; case AtomicOrdering::Release: case AtomicOrdering::AcquireRelease: return AtomicOrdering::AcquireRelease; case AtomicOrdering::SequentiallyConsistent: return AtomicOrdering::SequentiallyConsistent; + case AtomicOrdering::SyncRelease: + case AtomicOrdering::SyncSequentiallyConsistent: + return AtomicOrdering::SyncSequentiallyConsistent; } llvm_unreachable("Unknown ordering"); } diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -684,6 +684,15 @@ case AtomicOrdering::Release: v = 3; break; case AtomicOrdering::AcquireRelease: v = 4; break; case AtomicOrdering::SequentiallyConsistent: v = 5; break; + case AtomicOrdering::SyncAcquire: + v = 6; + break; + case AtomicOrdering::SyncRelease: + v = 7; + break; + case AtomicOrdering::SyncSequentiallyConsistent: + v = 8; + break; } return IRB->getInt32(v); } diff --git a/llvm/test/CodeGen/AArch64/aarch64-sync-builtins.ll b/llvm/test/CodeGen/AArch64/aarch64-sync-builtins.ll --- a/llvm/test/CodeGen/AArch64/aarch64-sync-builtins.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-sync-builtins.ll @@ -101,14 +101,16 @@ define dso_local i32 @test_fetch_and_add_32(ptr nocapture noundef %p, i32 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_add_32: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB0_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x0] -; CHECK-NEXT: add w9, w8, w1 -; CHECK-NEXT: stlxr w10, w9, [x0] +; CHECK-NEXT: ldxr w0, [x8] +; CHECK-NEXT: add w9, w0, w1 +; CHECK-NEXT: stlxr w10, w9, [x8] ; CHECK-NEXT: cbnz w10, .LBB0_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_fetch_and_add_32: @@ -119,11 +121,11 @@ ; OUTLINE-ATOMICS-NEXT: mov x8, x0 ; OUTLINE-ATOMICS-NEXT: mov w0, w1 ; OUTLINE-ATOMICS-NEXT: mov x1, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw add ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw add ptr %p, i32 %v sync_seq_cst, align 4 ret i32 %0 } @@ -133,11 +135,12 @@ ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB1_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x0, [x8] +; CHECK-NEXT: ldxr x0, [x8] ; CHECK-NEXT: add x9, x0, x1 ; CHECK-NEXT: stlxr w10, x9, [x8] ; CHECK-NEXT: cbnz w10, .LBB1_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_fetch_and_add_64: @@ -148,25 +151,27 @@ ; OUTLINE-ATOMICS-NEXT: mov x8, x0 ; OUTLINE-ATOMICS-NEXT: mov x0, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw add ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw add ptr %p, i64 %v sync_seq_cst, align 8 ret i64 %0 } define dso_local i32 @test_fetch_and_sub_32(ptr nocapture noundef %p, i32 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_sub_32: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB2_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x0] -; CHECK-NEXT: sub w9, w8, w1 -; CHECK-NEXT: stlxr w10, w9, [x0] +; CHECK-NEXT: ldxr w0, [x8] +; CHECK-NEXT: sub w9, w0, w1 +; CHECK-NEXT: stlxr w10, w9, [x8] ; CHECK-NEXT: cbnz w10, .LBB2_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_fetch_and_sub_32: @@ -177,11 +182,11 @@ ; OUTLINE-ATOMICS-NEXT: mov x8, x0 ; OUTLINE-ATOMICS-NEXT: neg w0, w1 ; OUTLINE-ATOMICS-NEXT: mov x1, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw sub ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw sub ptr %p, i32 %v sync_seq_cst, align 4 ret i32 %0 } @@ -191,11 +196,12 @@ ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB3_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x0, [x8] +; CHECK-NEXT: ldxr x0, [x8] ; CHECK-NEXT: sub x9, x0, x1 ; CHECK-NEXT: stlxr w10, x9, [x8] ; CHECK-NEXT: cbnz w10, .LBB3_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_fetch_and_sub_64: @@ -206,25 +212,27 @@ ; OUTLINE-ATOMICS-NEXT: mov x8, x0 ; OUTLINE-ATOMICS-NEXT: neg x0, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw sub ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw sub ptr %p, i64 %v sync_seq_cst, align 8 ret i64 %0 } define dso_local i32 @test_fetch_and_or_32(ptr nocapture noundef %p, i32 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_or_32: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB4_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x0] -; CHECK-NEXT: orr w9, w8, w1 -; CHECK-NEXT: stlxr w10, w9, [x0] +; CHECK-NEXT: ldxr w0, [x8] +; CHECK-NEXT: orr w9, w0, w1 +; CHECK-NEXT: stlxr w10, w9, [x8] ; CHECK-NEXT: cbnz w10, .LBB4_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_fetch_and_or_32: @@ -235,11 +243,11 @@ ; OUTLINE-ATOMICS-NEXT: mov x8, x0 ; OUTLINE-ATOMICS-NEXT: mov w0, w1 ; OUTLINE-ATOMICS-NEXT: mov x1, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw or ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw or ptr %p, i32 %v sync_seq_cst, align 4 ret i32 %0 } @@ -249,11 +257,12 @@ ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB5_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x0, [x8] +; CHECK-NEXT: ldxr x0, [x8] ; CHECK-NEXT: orr x9, x0, x1 ; CHECK-NEXT: stlxr w10, x9, [x8] ; CHECK-NEXT: cbnz w10, .LBB5_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_fetch_and_or_64: @@ -264,25 +273,27 @@ ; OUTLINE-ATOMICS-NEXT: mov x8, x0 ; OUTLINE-ATOMICS-NEXT: mov x0, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw or ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw or ptr %p, i64 %v sync_seq_cst, align 8 ret i64 %0 } define dso_local i32 @test_fetch_and_and_32(ptr nocapture noundef %p, i32 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_and_32: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB6_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x0] -; CHECK-NEXT: and w9, w8, w1 -; CHECK-NEXT: stlxr w10, w9, [x0] +; CHECK-NEXT: ldxr w0, [x8] +; CHECK-NEXT: and w9, w0, w1 +; CHECK-NEXT: stlxr w10, w9, [x8] ; CHECK-NEXT: cbnz w10, .LBB6_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_fetch_and_and_32: @@ -293,11 +304,11 @@ ; OUTLINE-ATOMICS-NEXT: mov x8, x0 ; OUTLINE-ATOMICS-NEXT: mvn w0, w1 ; OUTLINE-ATOMICS-NEXT: mov x1, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw and ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw and ptr %p, i32 %v sync_seq_cst, align 4 ret i32 %0 } @@ -307,11 +318,12 @@ ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB7_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x0, [x8] +; CHECK-NEXT: ldxr x0, [x8] ; CHECK-NEXT: and x9, x0, x1 ; CHECK-NEXT: stlxr w10, x9, [x8] ; CHECK-NEXT: cbnz w10, .LBB7_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_fetch_and_and_64: @@ -322,25 +334,27 @@ ; OUTLINE-ATOMICS-NEXT: mov x8, x0 ; OUTLINE-ATOMICS-NEXT: mvn x0, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw and ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw and ptr %p, i64 %v sync_seq_cst, align 8 ret i64 %0 } define dso_local i32 @test_fetch_and_xor_32(ptr nocapture noundef %p, i32 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_xor_32: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB8_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x0] -; CHECK-NEXT: eor w9, w8, w1 -; CHECK-NEXT: stlxr w10, w9, [x0] +; CHECK-NEXT: ldxr w0, [x8] +; CHECK-NEXT: eor w9, w0, w1 +; CHECK-NEXT: stlxr w10, w9, [x8] ; CHECK-NEXT: cbnz w10, .LBB8_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_fetch_and_xor_32: @@ -351,11 +365,11 @@ ; OUTLINE-ATOMICS-NEXT: mov x8, x0 ; OUTLINE-ATOMICS-NEXT: mov w0, w1 ; OUTLINE-ATOMICS-NEXT: mov x1, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw xor ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw xor ptr %p, i32 %v sync_seq_cst, align 4 ret i32 %0 } @@ -365,11 +379,12 @@ ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB9_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x0, [x8] +; CHECK-NEXT: ldxr x0, [x8] ; CHECK-NEXT: eor x9, x0, x1 ; CHECK-NEXT: stlxr w10, x9, [x8] ; CHECK-NEXT: cbnz w10, .LBB9_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_fetch_and_xor_64: @@ -380,11 +395,11 @@ ; OUTLINE-ATOMICS-NEXT: mov x8, x0 ; OUTLINE-ATOMICS-NEXT: mov x0, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw xor ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw xor ptr %p, i64 %v sync_seq_cst, align 8 ret i64 %0 } @@ -393,12 +408,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB10_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x0] +; CHECK-NEXT: ldxr w8, [x0] ; CHECK-NEXT: add w8, w8, w1 ; CHECK-NEXT: stlxr w9, w8, [x0] ; CHECK-NEXT: cbnz w9, .LBB10_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_add_and_fetch_32: @@ -410,12 +426,12 @@ ; OUTLINE-ATOMICS-NEXT: mov w19, w1 ; OUTLINE-ATOMICS-NEXT: mov x1, x0 ; OUTLINE-ATOMICS-NEXT: mov w0, w19 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_sync ; OUTLINE-ATOMICS-NEXT: add w0, w0, w19 ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw add ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw add ptr %p, i32 %v sync_seq_cst, align 4 %1 = add i32 %0, %v ret i32 %1 } @@ -425,12 +441,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB11_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x8, [x0] +; CHECK-NEXT: ldxr x8, [x0] ; CHECK-NEXT: add x8, x8, x1 ; CHECK-NEXT: stlxr w9, x8, [x0] ; CHECK-NEXT: cbnz w9, .LBB11_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_add_and_fetch_64: @@ -442,12 +459,12 @@ ; OUTLINE-ATOMICS-NEXT: mov x19, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x0 ; OUTLINE-ATOMICS-NEXT: mov x0, x19 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_sync ; OUTLINE-ATOMICS-NEXT: add x0, x0, x19 ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw add ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw add ptr %p, i64 %v sync_seq_cst, align 8 %1 = add i64 %0, %v ret i64 %1 } @@ -457,12 +474,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB12_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x0] +; CHECK-NEXT: ldxr w8, [x0] ; CHECK-NEXT: sub w8, w8, w1 ; CHECK-NEXT: stlxr w9, w8, [x0] ; CHECK-NEXT: cbnz w9, .LBB12_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_sub_and_fetch_32: @@ -474,12 +492,12 @@ ; OUTLINE-ATOMICS-NEXT: mov w19, w1 ; OUTLINE-ATOMICS-NEXT: mov x1, x0 ; OUTLINE-ATOMICS-NEXT: neg w0, w19 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_sync ; OUTLINE-ATOMICS-NEXT: sub w0, w0, w19 ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw sub ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw sub ptr %p, i32 %v sync_seq_cst, align 4 %1 = sub i32 %0, %v ret i32 %1 } @@ -489,12 +507,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB13_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x8, [x0] +; CHECK-NEXT: ldxr x8, [x0] ; CHECK-NEXT: sub x8, x8, x1 ; CHECK-NEXT: stlxr w9, x8, [x0] ; CHECK-NEXT: cbnz w9, .LBB13_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_sub_and_fetch_64: @@ -506,12 +525,12 @@ ; OUTLINE-ATOMICS-NEXT: mov x19, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x0 ; OUTLINE-ATOMICS-NEXT: neg x0, x19 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_sync ; OUTLINE-ATOMICS-NEXT: sub x0, x0, x19 ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw sub ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw sub ptr %p, i64 %v sync_seq_cst, align 8 %1 = sub i64 %0, %v ret i64 %1 } @@ -521,12 +540,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB14_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x0] +; CHECK-NEXT: ldxr w8, [x0] ; CHECK-NEXT: orr w8, w8, w1 ; CHECK-NEXT: stlxr w9, w8, [x0] ; CHECK-NEXT: cbnz w9, .LBB14_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_or_and_fetch_32: @@ -538,12 +558,12 @@ ; OUTLINE-ATOMICS-NEXT: mov w19, w1 ; OUTLINE-ATOMICS-NEXT: mov x1, x0 ; OUTLINE-ATOMICS-NEXT: mov w0, w19 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_sync ; OUTLINE-ATOMICS-NEXT: orr w0, w0, w19 ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw or ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw or ptr %p, i32 %v sync_seq_cst, align 4 %1 = or i32 %0, %v ret i32 %1 } @@ -553,12 +573,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB15_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x8, [x0] +; CHECK-NEXT: ldxr x8, [x0] ; CHECK-NEXT: orr x8, x8, x1 ; CHECK-NEXT: stlxr w9, x8, [x0] ; CHECK-NEXT: cbnz w9, .LBB15_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_or_and_fetch_64: @@ -570,12 +591,12 @@ ; OUTLINE-ATOMICS-NEXT: mov x19, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x0 ; OUTLINE-ATOMICS-NEXT: mov x0, x19 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_sync ; OUTLINE-ATOMICS-NEXT: orr x0, x0, x19 ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw or ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw or ptr %p, i64 %v sync_seq_cst, align 8 %1 = or i64 %0, %v ret i64 %1 } @@ -585,12 +606,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB16_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x0] +; CHECK-NEXT: ldxr w8, [x0] ; CHECK-NEXT: and w8, w8, w1 ; CHECK-NEXT: stlxr w9, w8, [x0] ; CHECK-NEXT: cbnz w9, .LBB16_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_and_and_fetch_32: @@ -602,12 +624,12 @@ ; OUTLINE-ATOMICS-NEXT: mov w19, w1 ; OUTLINE-ATOMICS-NEXT: mov x1, x0 ; OUTLINE-ATOMICS-NEXT: mvn w0, w19 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_sync ; OUTLINE-ATOMICS-NEXT: and w0, w0, w19 ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw and ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw and ptr %p, i32 %v sync_seq_cst, align 4 %1 = and i32 %0, %v ret i32 %1 } @@ -617,12 +639,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB17_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x8, [x0] +; CHECK-NEXT: ldxr x8, [x0] ; CHECK-NEXT: and x8, x8, x1 ; CHECK-NEXT: stlxr w9, x8, [x0] ; CHECK-NEXT: cbnz w9, .LBB17_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_and_and_fetch_64: @@ -634,12 +657,12 @@ ; OUTLINE-ATOMICS-NEXT: mov x19, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x0 ; OUTLINE-ATOMICS-NEXT: mvn x0, x19 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_sync ; OUTLINE-ATOMICS-NEXT: and x0, x0, x19 ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw and ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw and ptr %p, i64 %v sync_seq_cst, align 8 %1 = and i64 %0, %v ret i64 %1 } @@ -649,12 +672,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB18_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x0] +; CHECK-NEXT: ldxr w8, [x0] ; CHECK-NEXT: eor w8, w8, w1 ; CHECK-NEXT: stlxr w9, w8, [x0] ; CHECK-NEXT: cbnz w9, .LBB18_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_xor_and_fetch_32: @@ -666,12 +690,12 @@ ; OUTLINE-ATOMICS-NEXT: mov w19, w1 ; OUTLINE-ATOMICS-NEXT: mov x1, x0 ; OUTLINE-ATOMICS-NEXT: mov w0, w19 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_sync ; OUTLINE-ATOMICS-NEXT: eor w0, w0, w19 ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw xor ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw xor ptr %p, i32 %v sync_seq_cst, align 4 %1 = xor i32 %0, %v ret i32 %1 } @@ -681,12 +705,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB19_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x8, [x0] +; CHECK-NEXT: ldxr x8, [x0] ; CHECK-NEXT: eor x8, x8, x1 ; CHECK-NEXT: stlxr w9, x8, [x0] ; CHECK-NEXT: cbnz w9, .LBB19_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_xor_and_fetch_64: @@ -698,12 +723,12 @@ ; OUTLINE-ATOMICS-NEXT: mov x19, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x0 ; OUTLINE-ATOMICS-NEXT: mov x0, x19 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_sync ; OUTLINE-ATOMICS-NEXT: eor x0, x0, x19 ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw xor ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw xor ptr %p, i64 %v sync_seq_cst, align 8 %1 = xor i64 %0, %v ret i64 %1 } @@ -713,29 +738,31 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB20_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x0] +; CHECK-NEXT: ldxr w8, [x0] ; CHECK-NEXT: and w8, w8, w1 ; CHECK-NEXT: mvn w8, w8 ; CHECK-NEXT: stlxr w9, w8, [x0] ; CHECK-NEXT: cbnz w9, .LBB20_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_nand_and_fetch_32: ; OUTLINE-ATOMICS: // %bb.0: // %entry ; OUTLINE-ATOMICS-NEXT: .LBB20_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x0] +; OUTLINE-ATOMICS-NEXT: ldxr w8, [x0] ; OUTLINE-ATOMICS-NEXT: and w8, w8, w1 ; OUTLINE-ATOMICS-NEXT: mvn w8, w8 ; OUTLINE-ATOMICS-NEXT: stlxr w9, w8, [x0] ; OUTLINE-ATOMICS-NEXT: cbnz w9, .LBB20_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 +; OUTLINE-ATOMICS-NEXT: dmb ish ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw nand ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw nand ptr %p, i32 %v sync_seq_cst, align 4 %1 = and i32 %0, %v %2 = xor i32 %1, -1 ret i32 %2 @@ -746,29 +773,31 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB21_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x8, [x0] +; CHECK-NEXT: ldxr x8, [x0] ; CHECK-NEXT: and x8, x8, x1 ; CHECK-NEXT: mvn x8, x8 ; CHECK-NEXT: stlxr w9, x8, [x0] ; CHECK-NEXT: cbnz w9, .LBB21_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_nand_and_fetch_64: ; OUTLINE-ATOMICS: // %bb.0: // %entry ; OUTLINE-ATOMICS-NEXT: .LBB21_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x0] +; OUTLINE-ATOMICS-NEXT: ldxr x8, [x0] ; OUTLINE-ATOMICS-NEXT: and x8, x8, x1 ; OUTLINE-ATOMICS-NEXT: mvn x8, x8 ; OUTLINE-ATOMICS-NEXT: stlxr w9, x8, [x0] ; OUTLINE-ATOMICS-NEXT: cbnz w9, .LBB21_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 +; OUTLINE-ATOMICS-NEXT: dmb ish ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw nand ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw nand ptr %p, i64 %v sync_seq_cst, align 8 %1 = and i64 %0, %v %2 = xor i64 %1, -1 ret i64 %2 @@ -779,7 +808,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB22_1: // %cmpxchg.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w8, [x0] +; CHECK-NEXT: ldxr w8, [x0] ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.ne .LBB22_4 ; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore @@ -788,10 +817,12 @@ ; CHECK-NEXT: cbnz w8, .LBB22_1 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB22_4: // %cmpxchg.nostore ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: clrex +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_bool_cas_32: @@ -805,13 +836,13 @@ ; OUTLINE-ATOMICS-NEXT: mov w0, w1 ; OUTLINE-ATOMICS-NEXT: mov w1, w2 ; OUTLINE-ATOMICS-NEXT: mov x2, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_sync ; OUTLINE-ATOMICS-NEXT: cmp w0, w19 ; OUTLINE-ATOMICS-NEXT: cset w0, eq ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = cmpxchg ptr %p, i32 %old, i32 %new seq_cst seq_cst, align 4 + %0 = cmpxchg ptr %p, i32 %old, i32 %new sync_seq_cst sync_seq_cst, align 4 %1 = extractvalue { i32, i1 } %0, 1 ret i1 %1 } @@ -821,7 +852,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: .LBB23_1: // %cmpxchg.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x8, [x0] +; CHECK-NEXT: ldxr x8, [x0] ; CHECK-NEXT: cmp x8, x1 ; CHECK-NEXT: b.ne .LBB23_4 ; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore @@ -830,10 +861,12 @@ ; CHECK-NEXT: cbnz w8, .LBB23_1 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB23_4: // %cmpxchg.nostore ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: clrex +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_bool_cas_64: @@ -847,13 +880,13 @@ ; OUTLINE-ATOMICS-NEXT: mov x0, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x2 ; OUTLINE-ATOMICS-NEXT: mov x2, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_sync ; OUTLINE-ATOMICS-NEXT: cmp x0, x19 ; OUTLINE-ATOMICS-NEXT: cset w0, eq ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = cmpxchg ptr %p, i64 %old, i64 %new seq_cst seq_cst, align 8 + %0 = cmpxchg ptr %p, i64 %old, i64 %new sync_seq_cst sync_seq_cst, align 8 %1 = extractvalue { i64, i1 } %0, 1 ret i1 %1 } @@ -864,7 +897,7 @@ ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB24_1: // %cmpxchg.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr w0, [x8] +; CHECK-NEXT: ldxr w0, [x8] ; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: b.ne .LBB24_4 ; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore @@ -873,10 +906,12 @@ ; CHECK-NEXT: cbnz w9, .LBB24_1 ; CHECK-NEXT: // %bb.3: // %cmpxchg.end ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB24_4: // %cmpxchg.nostore ; CHECK-NEXT: clrex ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_sync_val_cas_32: @@ -888,11 +923,11 @@ ; OUTLINE-ATOMICS-NEXT: mov w0, w1 ; OUTLINE-ATOMICS-NEXT: mov w1, w2 ; OUTLINE-ATOMICS-NEXT: mov x2, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = cmpxchg ptr %p, i32 %old, i32 %new seq_cst seq_cst, align 4 + %0 = cmpxchg ptr %p, i32 %old, i32 %new sync_seq_cst sync_seq_cst, align 4 %1 = extractvalue { i32, i1 } %0, 0 ret i32 %1 } @@ -903,7 +938,7 @@ ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: .LBB25_1: // %cmpxchg.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxr x0, [x8] +; CHECK-NEXT: ldxr x0, [x8] ; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: b.ne .LBB25_4 ; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore @@ -911,9 +946,11 @@ ; CHECK-NEXT: stlxr w9, x2, [x8] ; CHECK-NEXT: cbnz w9, .LBB25_1 ; CHECK-NEXT: // %bb.3: // %cmpxchg.end +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB25_4: // %cmpxchg.nostore ; CHECK-NEXT: clrex +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_sync_val_cas_64: @@ -925,11 +962,11 @@ ; OUTLINE-ATOMICS-NEXT: mov x0, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x2 ; OUTLINE-ATOMICS-NEXT: mov x2, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = cmpxchg ptr %p, i64 %old, i64 %new seq_cst seq_cst, align 8 + %0 = cmpxchg ptr %p, i64 %old, i64 %new sync_seq_cst sync_seq_cst, align 8 %1 = extractvalue { i64, i1 } %0, 0 ret i64 %1 } @@ -941,10 +978,11 @@ ; CHECK-NEXT: .LBB26_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldaxr w0, [x8] -; CHECK-NEXT: stlxr w9, w1, [x8] +; CHECK-NEXT: stxr w9, w1, [x8] ; CHECK-NEXT: cbnz w9, .LBB26_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_lock_test_and_set_32: @@ -955,11 +993,11 @@ ; OUTLINE-ATOMICS-NEXT: mov x8, x0 ; OUTLINE-ATOMICS-NEXT: mov w0, w1 ; OUTLINE-ATOMICS-NEXT: mov x1, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw xchg ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw xchg ptr %p, i32 %v sync_acq, align 4 ret i32 %0 } @@ -970,9 +1008,10 @@ ; CHECK-NEXT: .LBB27_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldaxr x0, [x8] -; CHECK-NEXT: stlxr w9, x1, [x8] +; CHECK-NEXT: stxr w9, x1, [x8] ; CHECK-NEXT: cbnz w9, .LBB27_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: dmb ish ; CHECK-NEXT: ret ; ; OUTLINE-ATOMICS-LABEL: test_lock_test_and_set_64: @@ -983,11 +1022,11 @@ ; OUTLINE-ATOMICS-NEXT: mov x8, x0 ; OUTLINE-ATOMICS-NEXT: mov x0, x1 ; OUTLINE-ATOMICS-NEXT: mov x1, x8 -; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel +; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_sync ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret entry: - %0 = atomicrmw xchg ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw xchg ptr %p, i64 %v sync_acq, align 8 ret i64 %0 } @@ -1002,7 +1041,7 @@ ; OUTLINE-ATOMICS-NEXT: stlr wzr, [x0] ; OUTLINE-ATOMICS-NEXT: ret entry: - store atomic i32 0, ptr %p release, align 4 + store atomic i32 0, ptr %p sync_rel, align 4 ret void } @@ -1017,7 +1056,7 @@ ; OUTLINE-ATOMICS-NEXT: stlr xzr, [x0] ; OUTLINE-ATOMICS-NEXT: ret entry: - store atomic i64 0, ptr %p release, align 8 + store atomic i64 0, ptr %p sync_rel, align 8 ret void } diff --git a/llvm/test/CodeGen/RISCV/sync-builtins.ll b/llvm/test/CodeGen/RISCV/sync-builtins.ll --- a/llvm/test/CodeGen/RISCV/sync-builtins.ll +++ b/llvm/test/CodeGen/RISCV/sync-builtins.ll @@ -100,10 +100,10 @@ define dso_local signext i32 @test_fetch_and_add_32(ptr nocapture noundef %p, i32 noundef signext %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_add_32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoadd.w.aqrl a0, a1, (a0) +; CHECK-NEXT: amoadd.w.aq a0, a1, (a0) ; CHECK-NEXT: ret entry: - %0 = atomicrmw add ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw add ptr %p, i32 %v sync_seq_cst, align 4 ret i32 %0 } @@ -111,10 +111,10 @@ define dso_local i64 @test_fetch_and_add_64(ptr nocapture noundef %p, i64 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_add_64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoadd.d.aqrl a0, a1, (a0) +; CHECK-NEXT: amoadd.d.aq a0, a1, (a0) ; CHECK-NEXT: ret entry: - %0 = atomicrmw add ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw add ptr %p, i64 %v sync_seq_cst, align 8 ret i64 %0 } @@ -123,10 +123,10 @@ ; CHECK-LABEL: test_fetch_and_sub_32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: neg a1, a1 -; CHECK-NEXT: amoadd.w.aqrl a0, a1, (a0) +; CHECK-NEXT: amoadd.w.aq a0, a1, (a0) ; CHECK-NEXT: ret entry: - %0 = atomicrmw sub ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw sub ptr %p, i32 %v sync_seq_cst, align 4 ret i32 %0 } @@ -135,10 +135,10 @@ ; CHECK-LABEL: test_fetch_and_sub_64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: neg a1, a1 -; CHECK-NEXT: amoadd.d.aqrl a0, a1, (a0) +; CHECK-NEXT: amoadd.d.aq a0, a1, (a0) ; CHECK-NEXT: ret entry: - %0 = atomicrmw sub ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw sub ptr %p, i64 %v sync_seq_cst, align 8 ret i64 %0 } @@ -146,10 +146,10 @@ define dso_local signext i32 @test_fetch_and_or_32(ptr nocapture noundef %p, i32 noundef signext %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_or_32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoor.w.aqrl a0, a1, (a0) +; CHECK-NEXT: amoor.w.aq a0, a1, (a0) ; CHECK-NEXT: ret entry: - %0 = atomicrmw or ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw or ptr %p, i32 %v sync_seq_cst, align 4 ret i32 %0 } @@ -157,10 +157,10 @@ define dso_local i64 @test_fetch_and_or_64(ptr nocapture noundef %p, i64 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_or_64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoor.d.aqrl a0, a1, (a0) +; CHECK-NEXT: amoor.d.aq a0, a1, (a0) ; CHECK-NEXT: ret entry: - %0 = atomicrmw or ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw or ptr %p, i64 %v sync_seq_cst, align 8 ret i64 %0 } @@ -168,10 +168,10 @@ define dso_local signext i32 @test_fetch_and_and_32(ptr nocapture noundef %p, i32 noundef signext %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_and_32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoand.w.aqrl a0, a1, (a0) +; CHECK-NEXT: amoand.w.aq a0, a1, (a0) ; CHECK-NEXT: ret entry: - %0 = atomicrmw and ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw and ptr %p, i32 %v sync_seq_cst, align 4 ret i32 %0 } @@ -179,10 +179,10 @@ define dso_local i64 @test_fetch_and_and_64(ptr nocapture noundef %p, i64 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_and_64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoand.d.aqrl a0, a1, (a0) +; CHECK-NEXT: amoand.d.aq a0, a1, (a0) ; CHECK-NEXT: ret entry: - %0 = atomicrmw and ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw and ptr %p, i64 %v sync_seq_cst, align 8 ret i64 %0 } @@ -190,10 +190,10 @@ define dso_local signext i32 @test_fetch_and_xor_32(ptr nocapture noundef %p, i32 noundef signext %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_xor_32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoxor.w.aqrl a0, a1, (a0) +; CHECK-NEXT: amoxor.w.aq a0, a1, (a0) ; CHECK-NEXT: ret entry: - %0 = atomicrmw xor ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw xor ptr %p, i32 %v sync_seq_cst, align 4 ret i32 %0 } @@ -201,10 +201,10 @@ define dso_local i64 @test_fetch_and_xor_64(ptr nocapture noundef %p, i64 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_fetch_and_xor_64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoxor.d.aqrl a0, a1, (a0) +; CHECK-NEXT: amoxor.d.aq a0, a1, (a0) ; CHECK-NEXT: ret entry: - %0 = atomicrmw xor ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw xor ptr %p, i64 %v sync_seq_cst, align 8 ret i64 %0 } @@ -212,11 +212,11 @@ define dso_local signext i32 @test_add_and_fetch_32(ptr nocapture noundef %p, i32 noundef signext %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_add_and_fetch_32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoadd.w.aqrl a0, a1, (a0) +; CHECK-NEXT: amoadd.w.aq a0, a1, (a0) ; CHECK-NEXT: addw a0, a0, a1 ; CHECK-NEXT: ret entry: - %0 = atomicrmw add ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw add ptr %p, i32 %v sync_seq_cst, align 4 %1 = add i32 %0, %v ret i32 %1 } @@ -225,11 +225,11 @@ define dso_local i64 @test_add_and_fetch_64(ptr nocapture noundef %p, i64 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_add_and_fetch_64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoadd.d.aqrl a0, a1, (a0) +; CHECK-NEXT: amoadd.d.aq a0, a1, (a0) ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: ret entry: - %0 = atomicrmw add ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw add ptr %p, i64 %v sync_seq_cst, align 8 %1 = add i64 %0, %v ret i64 %1 } @@ -239,11 +239,11 @@ ; CHECK-LABEL: test_sub_and_fetch_32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: neg a2, a1 -; CHECK-NEXT: amoadd.w.aqrl a0, a2, (a0) +; CHECK-NEXT: amoadd.w.aq a0, a2, (a0) ; CHECK-NEXT: subw a0, a0, a1 ; CHECK-NEXT: ret entry: - %0 = atomicrmw sub ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw sub ptr %p, i32 %v sync_seq_cst, align 4 %1 = sub i32 %0, %v ret i32 %1 } @@ -253,11 +253,11 @@ ; CHECK-LABEL: test_sub_and_fetch_64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: neg a2, a1 -; CHECK-NEXT: amoadd.d.aqrl a0, a2, (a0) +; CHECK-NEXT: amoadd.d.aq a0, a2, (a0) ; CHECK-NEXT: sub a0, a0, a1 ; CHECK-NEXT: ret entry: - %0 = atomicrmw sub ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw sub ptr %p, i64 %v sync_seq_cst, align 8 %1 = sub i64 %0, %v ret i64 %1 } @@ -266,11 +266,11 @@ define dso_local signext i32 @test_or_and_fetch_32(ptr nocapture noundef %p, i32 noundef signext %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_or_and_fetch_32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoor.w.aqrl a0, a1, (a0) +; CHECK-NEXT: amoor.w.aq a0, a1, (a0) ; CHECK-NEXT: or a0, a0, a1 ; CHECK-NEXT: ret entry: - %0 = atomicrmw or ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw or ptr %p, i32 %v sync_seq_cst, align 4 %1 = or i32 %0, %v ret i32 %1 } @@ -279,11 +279,11 @@ define dso_local i64 @test_or_and_fetch_64(ptr nocapture noundef %p, i64 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_or_and_fetch_64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoor.d.aqrl a0, a1, (a0) +; CHECK-NEXT: amoor.d.aq a0, a1, (a0) ; CHECK-NEXT: or a0, a0, a1 ; CHECK-NEXT: ret entry: - %0 = atomicrmw or ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw or ptr %p, i64 %v sync_seq_cst, align 8 %1 = or i64 %0, %v ret i64 %1 } @@ -292,11 +292,11 @@ define dso_local signext i32 @test_and_and_fetch_32(ptr nocapture noundef %p, i32 noundef signext %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_and_and_fetch_32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoand.w.aqrl a0, a1, (a0) +; CHECK-NEXT: amoand.w.aq a0, a1, (a0) ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: ret entry: - %0 = atomicrmw and ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw and ptr %p, i32 %v sync_seq_cst, align 4 %1 = and i32 %0, %v ret i32 %1 } @@ -305,11 +305,11 @@ define dso_local i64 @test_and_and_fetch_64(ptr nocapture noundef %p, i64 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_and_and_fetch_64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoand.d.aqrl a0, a1, (a0) +; CHECK-NEXT: amoand.d.aq a0, a1, (a0) ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: ret entry: - %0 = atomicrmw and ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw and ptr %p, i64 %v sync_seq_cst, align 8 %1 = and i64 %0, %v ret i64 %1 } @@ -318,11 +318,11 @@ define dso_local signext i32 @test_xor_and_fetch_32(ptr nocapture noundef %p, i32 noundef signext %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_xor_and_fetch_32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoxor.w.aqrl a0, a1, (a0) +; CHECK-NEXT: amoxor.w.aq a0, a1, (a0) ; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: ret entry: - %0 = atomicrmw xor ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw xor ptr %p, i32 %v sync_seq_cst, align 4 %1 = xor i32 %0, %v ret i32 %1 } @@ -331,11 +331,11 @@ define dso_local i64 @test_xor_and_fetch_64(ptr nocapture noundef %p, i64 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_xor_and_fetch_64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoxor.d.aqrl a0, a1, (a0) +; CHECK-NEXT: amoxor.d.aq a0, a1, (a0) ; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: ret entry: - %0 = atomicrmw xor ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw xor ptr %p, i64 %v sync_seq_cst, align 8 %1 = xor i64 %0, %v ret i64 %1 } @@ -346,17 +346,17 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: .LBB20_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lr.w.aqrl a2, (a0) +; CHECK-NEXT: lr.w.aq a2, (a0) ; CHECK-NEXT: and a3, a2, a1 ; CHECK-NEXT: not a3, a3 -; CHECK-NEXT: sc.w.aqrl a3, a3, (a0) +; CHECK-NEXT: sc.w a3, a3, (a0) ; CHECK-NEXT: bnez a3, .LBB20_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: and a0, a2, a1 ; CHECK-NEXT: not a0, a0 ; CHECK-NEXT: ret entry: - %0 = atomicrmw nand ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw nand ptr %p, i32 %v sync_seq_cst, align 4 %1 = and i32 %0, %v %2 = xor i32 %1, -1 ret i32 %2 @@ -368,17 +368,17 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: .LBB21_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lr.d.aqrl a2, (a0) +; CHECK-NEXT: lr.d.aq a2, (a0) ; CHECK-NEXT: and a3, a2, a1 ; CHECK-NEXT: not a3, a3 -; CHECK-NEXT: sc.d.aqrl a3, a3, (a0) +; CHECK-NEXT: sc.d a3, a3, (a0) ; CHECK-NEXT: bnez a3, .LBB21_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: and a0, a2, a1 ; CHECK-NEXT: not a0, a0 ; CHECK-NEXT: ret entry: - %0 = atomicrmw nand ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw nand ptr %p, i64 %v sync_seq_cst, align 8 %1 = and i64 %0, %v %2 = xor i64 %1, -1 ret i64 %2 @@ -390,18 +390,18 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: .LBB22_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lr.w.aqrl a3, (a0) +; CHECK-NEXT: lr.w.aq a3, (a0) ; CHECK-NEXT: bne a3, a1, .LBB22_3 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: # in Loop: Header=BB22_1 Depth=1 -; CHECK-NEXT: sc.w.aqrl a4, a2, (a0) +; CHECK-NEXT: sc.w a4, a2, (a0) ; CHECK-NEXT: bnez a4, .LBB22_1 ; CHECK-NEXT: .LBB22_3: # %entry ; CHECK-NEXT: xor a0, a3, a1 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret entry: - %0 = cmpxchg ptr %p, i32 %old, i32 %new seq_cst seq_cst, align 4 + %0 = cmpxchg ptr %p, i32 %old, i32 %new sync_seq_cst sync_seq_cst, align 4 %1 = extractvalue { i32, i1 } %0, 1 ret i1 %1 } @@ -412,18 +412,18 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: .LBB23_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lr.d.aqrl a3, (a0) +; CHECK-NEXT: lr.d.aq a3, (a0) ; CHECK-NEXT: bne a3, a1, .LBB23_3 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: # in Loop: Header=BB23_1 Depth=1 -; CHECK-NEXT: sc.d.aqrl a4, a2, (a0) +; CHECK-NEXT: sc.d a4, a2, (a0) ; CHECK-NEXT: bnez a4, .LBB23_1 ; CHECK-NEXT: .LBB23_3: # %entry ; CHECK-NEXT: xor a0, a3, a1 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret entry: - %0 = cmpxchg ptr %p, i64 %old, i64 %new seq_cst seq_cst, align 8 + %0 = cmpxchg ptr %p, i64 %old, i64 %new sync_seq_cst sync_seq_cst, align 8 %1 = extractvalue { i64, i1 } %0, 1 ret i1 %1 } @@ -434,17 +434,17 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: .LBB24_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lr.w.aqrl a3, (a0) +; CHECK-NEXT: lr.w.aq a3, (a0) ; CHECK-NEXT: bne a3, a1, .LBB24_3 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: # in Loop: Header=BB24_1 Depth=1 -; CHECK-NEXT: sc.w.aqrl a4, a2, (a0) +; CHECK-NEXT: sc.w a4, a2, (a0) ; CHECK-NEXT: bnez a4, .LBB24_1 ; CHECK-NEXT: .LBB24_3: # %entry ; CHECK-NEXT: mv a0, a3 ; CHECK-NEXT: ret entry: - %0 = cmpxchg ptr %p, i32 %old, i32 %new seq_cst seq_cst, align 4 + %0 = cmpxchg ptr %p, i32 %old, i32 %new sync_seq_cst sync_seq_cst, align 4 %1 = extractvalue { i32, i1 } %0, 0 ret i32 %1 } @@ -455,17 +455,17 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: .LBB25_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lr.d.aqrl a3, (a0) +; CHECK-NEXT: lr.d.aq a3, (a0) ; CHECK-NEXT: bne a3, a1, .LBB25_3 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: # in Loop: Header=BB25_1 Depth=1 -; CHECK-NEXT: sc.d.aqrl a4, a2, (a0) +; CHECK-NEXT: sc.d a4, a2, (a0) ; CHECK-NEXT: bnez a4, .LBB25_1 ; CHECK-NEXT: .LBB25_3: # %entry ; CHECK-NEXT: mv a0, a3 ; CHECK-NEXT: ret entry: - %0 = cmpxchg ptr %p, i64 %old, i64 %new seq_cst seq_cst, align 8 + %0 = cmpxchg ptr %p, i64 %old, i64 %new sync_seq_cst sync_seq_cst, align 8 %1 = extractvalue { i64, i1 } %0, 0 ret i64 %1 } @@ -474,10 +474,10 @@ define dso_local signext i32 @test_lock_test_and_set_32(ptr nocapture noundef %p, i32 noundef signext %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_lock_test_and_set_32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoswap.w.aqrl a0, a1, (a0) +; CHECK-NEXT: amoswap.w.aq a0, a1, (a0) ; CHECK-NEXT: ret entry: - %0 = atomicrmw xchg ptr %p, i32 %v seq_cst, align 4 + %0 = atomicrmw xchg ptr %p, i32 %v sync_acq, align 4 ret i32 %0 } @@ -485,10 +485,10 @@ define dso_local i64 @test_lock_test_and_set_64(ptr nocapture noundef %p, i64 noundef %v) local_unnamed_addr #0 { ; CHECK-LABEL: test_lock_test_and_set_64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: amoswap.d.aqrl a0, a1, (a0) +; CHECK-NEXT: amoswap.d.aq a0, a1, (a0) ; CHECK-NEXT: ret entry: - %0 = atomicrmw xchg ptr %p, i64 %v seq_cst, align 8 + %0 = atomicrmw xchg ptr %p, i64 %v sync_acq, align 8 ret i64 %0 } @@ -499,7 +499,7 @@ ; CHECK-NEXT: sw zero, 0(a0) ; CHECK-NEXT: ret entry: - store atomic i32 0, ptr %p release, align 4 + store atomic i32 0, ptr %p sync_rel, align 4 ret void } @@ -510,7 +510,7 @@ ; CHECK-NEXT: sd zero, 0(a0) ; CHECK-NEXT: ret entry: - store atomic i64 0, ptr %p release, align 8 + store atomic i64 0, ptr %p sync_rel, align 8 ret void }