diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -6262,6 +6262,7 @@ bool isCmpXChg() const { return getOp() == AO__c11_atomic_compare_exchange_strong || getOp() == AO__c11_atomic_compare_exchange_weak || + getOp() == AO__hip_atomic_compare_exchange_strong || getOp() == AO__opencl_atomic_compare_exchange_strong || getOp() == AO__opencl_atomic_compare_exchange_weak || getOp() == AO__atomic_compare_exchange || @@ -6298,7 +6299,10 @@ auto Kind = (Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max) ? AtomicScopeModelKind::OpenCL - : AtomicScopeModelKind::None; + : (Op >= AO__hip_atomic_compare_exchange_strong && + Op <= AO__hip_atomic_fetch_max) + ? AtomicScopeModelKind::HIP + : AtomicScopeModelKind::None; return AtomicScopeModel::create(Kind); } diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -786,6 +786,18 @@ ATOMIC_BUILTIN(__atomic_fetch_min, "v.", "t") ATOMIC_BUILTIN(__atomic_fetch_max, "v.", "t") +// HIP atomic builtins. +// FIXME: Is `__hip_atomic_compare_exchange_n` or +// `__hip_atomic_compare_exchange_weak` needed? +ATOMIC_BUILTIN(__hip_atomic_compare_exchange_strong, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_exchange, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_fetch_add, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_fetch_and, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_fetch_or, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_fetch_xor, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_fetch_min, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_fetch_max, "v.", "t") + #undef ATOMIC_BUILTIN // Non-overloaded atomic builtins. diff --git a/clang/include/clang/Basic/SyncScope.h b/clang/include/clang/Basic/SyncScope.h --- a/clang/include/clang/Basic/SyncScope.h +++ b/clang/include/clang/Basic/SyncScope.h @@ -40,6 +40,11 @@ /// Update getAsString. /// enum class SyncScope { + HIPSingleThread, + HIPWavefront, + HIPWorkgroup, + HIPAgent, + HIPSystem, OpenCLWorkGroup, OpenCLDevice, OpenCLAllSVMDevices, @@ -49,6 +54,16 @@ inline llvm::StringRef getAsString(SyncScope S) { switch (S) { + case SyncScope::HIPSingleThread: + return "hip_singlethread"; + case SyncScope::HIPWavefront: + return "hip_wavefront"; + case SyncScope::HIPWorkgroup: + return "hip_workgroup"; + case SyncScope::HIPAgent: + return "hip_agent"; + case SyncScope::HIPSystem: + return "hip_system"; case SyncScope::OpenCLWorkGroup: return "opencl_workgroup"; case SyncScope::OpenCLDevice: @@ -62,7 +77,7 @@ } /// Defines the kind of atomic scope models. -enum class AtomicScopeModelKind { None, OpenCL }; +enum class AtomicScopeModelKind { None, OpenCL, HIP }; /// Defines the interface for synch scope model. class AtomicScopeModel { @@ -138,6 +153,58 @@ } }; +/// Defines the synch scope model for HIP. +class AtomicScopeHIPModel : public AtomicScopeModel { +public: + /// The enum values match the pre-defined macros + /// __HIP_MEMORY_SCOPE_*, which are used to define memory_scope_* + /// enums in hip-c.h. + enum ID { + SingleThread = 1, + Wavefront = 2, + Workgroup = 3, + Agent = 4, + System = 5, + Last = System + }; + + AtomicScopeHIPModel() {} + + SyncScope map(unsigned S) const override { + switch (static_cast(S)) { + case SingleThread: + return SyncScope::HIPSingleThread; + case Wavefront: + return SyncScope::HIPWavefront; + case Workgroup: + return SyncScope::HIPWorkgroup; + case Agent: + return SyncScope::HIPAgent; + case System: + return SyncScope::HIPSystem; + } + llvm_unreachable("Invalid language synch scope value"); + } + + bool isValid(unsigned S) const override { + return S >= static_cast(SingleThread) && + S <= static_cast(Last); + } + + ArrayRef getRuntimeValues() const override { + static_assert(Last == System, "Does not include all synch scopes"); + static const unsigned Scopes[] = { + static_cast(SingleThread), static_cast(Wavefront), + static_cast(Workgroup), static_cast(Agent), + static_cast(System)}; + return llvm::makeArrayRef(Scopes); + } + + unsigned getFallBackValue() const override { + return static_cast(System); + } +}; + inline std::unique_ptr AtomicScopeModel::create(AtomicScopeModelKind K) { switch (K) { @@ -145,6 +212,8 @@ return std::unique_ptr{}; case AtomicScopeModelKind::OpenCL: return std::make_unique(); + case AtomicScopeModelKind::HIP: + return std::make_unique(); } llvm_unreachable("Invalid atomic scope model kind"); } diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -4623,6 +4623,13 @@ case AO__atomic_fetch_max: return 3; + case AO__hip_atomic_exchange: + case AO__hip_atomic_fetch_add: + case AO__hip_atomic_fetch_and: + case AO__hip_atomic_fetch_or: + case AO__hip_atomic_fetch_xor: + case AO__hip_atomic_fetch_min: + case AO__hip_atomic_fetch_max: case AO__opencl_atomic_store: case AO__opencl_atomic_exchange: case AO__opencl_atomic_fetch_add: @@ -4639,6 +4646,7 @@ case AO__c11_atomic_compare_exchange_weak: return 5; + case AO__hip_atomic_compare_exchange_strong: case AO__opencl_atomic_compare_exchange_strong: case AO__opencl_atomic_compare_exchange_weak: case AO__atomic_compare_exchange: diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -533,6 +533,7 @@ llvm_unreachable("Already handled!"); case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2, FailureOrder, Size, Order, Scope); @@ -595,6 +596,7 @@ } case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__hip_atomic_exchange: case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: @@ -605,6 +607,7 @@ PostOp = llvm::Instruction::Add; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__hip_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: Op = llvm::AtomicRMWInst::Add; @@ -623,6 +626,7 @@ PostOpMinMax = true; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_min: Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min @@ -633,6 +637,7 @@ PostOpMinMax = true; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_max: + case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_max: Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max @@ -643,6 +648,7 @@ PostOp = llvm::Instruction::And; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: Op = llvm::AtomicRMWInst::And; @@ -652,6 +658,7 @@ PostOp = llvm::Instruction::Or; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__hip_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: Op = llvm::AtomicRMWInst::Or; @@ -661,6 +668,7 @@ PostOp = llvm::Instruction::Xor; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_xor: case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_xor: Op = llvm::AtomicRMWInst::Xor; @@ -859,6 +867,7 @@ case AtomicExpr::AO__c11_atomic_compare_exchange_strong: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: case AtomicExpr::AO__atomic_compare_exchange_n: @@ -876,6 +885,7 @@ case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__hip_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_sub: if (MemTy->isPointerType()) { @@ -900,6 +910,7 @@ case AtomicExpr::AO__atomic_sub_fetch: case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__hip_atomic_exchange: case AtomicExpr::AO__opencl_atomic_store: case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_store_n: @@ -909,6 +920,11 @@ case AtomicExpr::AO__c11_atomic_fetch_xor: case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__c11_atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_and: + case AtomicExpr::AO__hip_atomic_fetch_or: + case AtomicExpr::AO__hip_atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_xor: @@ -957,12 +973,15 @@ llvm_unreachable("Already handled above with EmitAtomicInit!"); case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__hip_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__hip_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_nand: @@ -970,8 +989,11 @@ case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_xor: case AtomicExpr::AO__opencl_atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_xor: case AtomicExpr::AO__c11_atomic_fetch_max: @@ -1004,6 +1026,8 @@ case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__hip_atomic_exchange: + case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__opencl_atomic_store: case AtomicExpr::AO__opencl_atomic_exchange: @@ -1068,6 +1092,7 @@ // int success, int failure) case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__atomic_compare_exchange: @@ -1088,6 +1113,7 @@ // int order) // T __atomic_exchange_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__hip_atomic_exchange: case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: @@ -1121,6 +1147,7 @@ PostOp = llvm::Instruction::Add; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__hip_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: LibCallName = "__atomic_fetch_add"; @@ -1133,6 +1160,7 @@ PostOp = llvm::Instruction::And; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: LibCallName = "__atomic_fetch_and"; @@ -1145,6 +1173,7 @@ PostOp = llvm::Instruction::Or; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__hip_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: LibCallName = "__atomic_fetch_or"; @@ -1169,6 +1198,7 @@ PostOp = llvm::Instruction::Xor; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_xor: case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_xor: LibCallName = "__atomic_fetch_xor"; @@ -1180,6 +1210,7 @@ LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: LibCallName = E->getValueType()->isSignedIntegerType() ? "__atomic_fetch_min" @@ -1192,6 +1223,7 @@ LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: LibCallName = E->getValueType()->isSignedIntegerType() ? "__atomic_fetch_max" diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9158,17 +9158,28 @@ llvm::LLVMContext &Ctx) const { std::string Name; switch (Scope) { + case SyncScope::HIPSingleThread: + Name = "singlethread"; + break; + case SyncScope::HIPWavefront: + case SyncScope::OpenCLSubGroup: + Name = "wavefront"; + break; + case SyncScope::HIPWorkgroup: case SyncScope::OpenCLWorkGroup: Name = "workgroup"; break; + case SyncScope::HIPAgent: case SyncScope::OpenCLDevice: Name = "agent"; break; + case SyncScope::HIPSystem: case SyncScope::OpenCLAllSVMDevices: Name = ""; break; - case SyncScope::OpenCLSubGroup: - Name = "wavefront"; + default: + assert(false && "NOT IMPLEMENTED!"); + break; } if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -475,8 +475,14 @@ if (LangOpts.HIP) { Builder.defineMacro("__HIP__"); Builder.defineMacro("__HIPCC__"); - if (LangOpts.CUDAIsDevice) + if (LangOpts.CUDAIsDevice) { Builder.defineMacro("__HIP_DEVICE_COMPILE__"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_SINGLETHREAD", "1"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_WAVEFRONT", "2"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5"); + } } } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -4649,6 +4649,8 @@ "need to update code for modified C11 atomics"); bool IsOpenCL = Op >= AtomicExpr::AO__opencl_atomic_init && Op <= AtomicExpr::AO__opencl_atomic_fetch_max; + bool IsHIP = Op >= AtomicExpr::AO__hip_atomic_compare_exchange_strong && + Op <= AtomicExpr::AO__hip_atomic_fetch_max; bool IsC11 = (Op >= AtomicExpr::AO__c11_atomic_init && Op <= AtomicExpr::AO__c11_atomic_fetch_min) || IsOpenCL; @@ -4681,6 +4683,9 @@ Form = Copy; break; + case AtomicExpr::AO__hip_atomic_fetch_add: + case AtomicExpr::AO__hip_atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_sub: case AtomicExpr::AO__opencl_atomic_fetch_add: @@ -4694,6 +4699,9 @@ case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_and: + case AtomicExpr::AO__hip_atomic_fetch_or: + case AtomicExpr::AO__hip_atomic_fetch_xor: case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_xor: @@ -4717,6 +4725,7 @@ break; case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__hip_atomic_exchange: case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: Form = Xchg; @@ -4728,6 +4737,7 @@ case AtomicExpr::AO__c11_atomic_compare_exchange_strong: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: Form = C11CmpXchg; @@ -4740,7 +4750,7 @@ } unsigned AdjustedNumArgs = NumArgs[Form]; - if (IsOpenCL && Op != AtomicExpr::AO__opencl_atomic_init) + if ((IsOpenCL || IsHIP) && Op != AtomicExpr::AO__opencl_atomic_init) ++AdjustedNumArgs; // Check we have the right number of arguments. if (Args.size() < AdjustedNumArgs) { @@ -4864,7 +4874,7 @@ // arguments are actually passed as pointers. QualType ByValType = ValType; // 'CP' bool IsPassedByAddress = false; - if (!IsC11 && !IsN) { + if (!IsC11 && !IsHIP && !IsN) { ByValType = Ptr->getType(); IsPassedByAddress = true; } diff --git a/clang/test/CodeGenCUDA/atomic-ops.cu b/clang/test/CodeGenCUDA/atomic-ops.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/atomic-ops.cu @@ -0,0 +1,302 @@ +// RUN: %clang_cc1 -x hip -std=c++11 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +#include "Inputs/cuda.h" + +// CHECK-LABEL: @_Z24atomic32_op_singlethreadPiii +// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +__device__ int atomic32_op_singlethread(int *ptr, int val, int desired) { + bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + return flag ? val : desired; +} + +// CHECK-LABEL: @_Z25atomicu32_op_singlethreadPjjj +// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") +__device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned int val, unsigned int desired) { + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + return val; +} + +// CHECK-LABEL: @_Z21atomic32_op_wavefrontPiii +// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +__device__ int atomic32_op_wavefront(int *ptr, int val, int desired) { + bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + return flag ? val : desired; +} + +// CHECK-LABEL: @_Z22atomicu32_op_wavefrontPjjj +// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") +__device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int val, unsigned int desired) { + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + return val; +} + +// CHECK-LABEL: @_Z21atomic32_op_workgroupPiii +// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +__device__ int atomic32_op_workgroup(int *ptr, int val, int desired) { + bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + return flag ? val : desired; +} + +// CHECK-LABEL: @_Z22atomicu32_op_workgroupPjjj +// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") +__device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int val, unsigned int desired) { + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + return val; +} + +// CHECK-LABEL: @_Z17atomic32_op_agentPiii +// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +__device__ int atomic32_op_agent(int *ptr, int val, int desired) { + bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + return flag ? val : desired; +} + +// CHECK-LABEL: @_Z18atomicu32_op_agentPjjj +// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") +__device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val, unsigned int desired) { + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + return val; +} + +// CHECK-LABEL: @_Z18atomic32_op_systemPiii +// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +__device__ int atomic32_op_system(int *ptr, int val, int desired) { + bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + return flag ? val : desired; +} + +// CHECK-LABEL: @_Z19atomicu32_op_systemPjjj +// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") +__device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val, unsigned int desired) { + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + return val; +} + +// CHECK-LABEL: @_Z24atomic64_op_singlethreadPxxx +// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +__device__ long long atomic64_op_singlethread(long long *ptr, long long val, long long desired) { + bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + return flag ? val : desired; +} + +// CHECK-LABEL: @_Z25atomicu64_op_singlethreadPyyy +// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") +__device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, unsigned long long val, unsigned long long desired) { + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); + return val; +} + +// CHECK-LABEL: @_Z21atomic64_op_wavefrontPxxx +// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +__device__ long long atomic64_op_wavefront(long long *ptr, long long val, long long desired) { + bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + return flag ? val : desired; +} + +// CHECK-LABEL: @_Z22atomicu64_op_wavefrontPyyy +// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") +__device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, unsigned long long val, unsigned long long desired) { + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); + return val; +} + +// CHECK-LABEL: @_Z21atomic64_op_workgroupPxxx +// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +__device__ long long atomic64_op_workgroup(long long *ptr, long long val, long long desired) { + bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + return flag ? val : desired; +} + +// CHECK-LABEL: @_Z22atomicu64_op_workgroupPyyy +// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") +__device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, unsigned long long val, unsigned long long desired) { + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); + return val; +} + +// CHECK-LABEL: @_Z17atomic64_op_agentPxxx +// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +__device__ long long atomic64_op_agent(long long *ptr, long long val, long long desired) { + bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + return flag ? val : desired; +} + +// CHECK-LABEL: @_Z18atomicu64_op_agentPyyy +// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") +__device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsigned long long val, unsigned long long desired) { + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + return val; +} + +// CHECK-LABEL: @_Z18atomic64_op_systemPxxx +// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +__device__ long long atomic64_op_system(long long *ptr, long long val, long long desired) { + bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + return flag ? val : desired; +} + +// CHECK-LABEL: @_Z19atomicu64_op_systemPyyy +// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") +__device__ unsigned long long atomicu64_op_system(unsigned long long *ptr, unsigned long long val, unsigned long long desired) { + val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); + return val; +}