Index: clang/lib/CodeGen/CGAtomic.cpp =================================================================== --- clang/lib/CodeGen/CGAtomic.cpp +++ clang/lib/CodeGen/CGAtomic.cpp @@ -636,8 +636,11 @@ case AtomicExpr::AO__hip_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_min: - Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min - : llvm::AtomicRMWInst::UMin; + Op = E->getValueType()->isFloatingType() + ? llvm::AtomicRMWInst::FMin + : (E->getValueType()->isSignedIntegerType() + ? llvm::AtomicRMWInst::Min + : llvm::AtomicRMWInst::UMin); break; case AtomicExpr::AO__atomic_max_fetch: @@ -647,8 +650,11 @@ case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_max: - Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max - : llvm::AtomicRMWInst::UMax; + Op = E->getValueType()->isFloatingType() + ? llvm::AtomicRMWInst::FMax + : (E->getValueType()->isSignedIntegerType() + ? llvm::AtomicRMWInst::Max + : llvm::AtomicRMWInst::UMax); break; case AtomicExpr::AO__atomic_and_fetch: @@ -916,9 +922,19 @@ } [[fallthrough]]; case AtomicExpr::AO__atomic_fetch_add: + case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_sub: case AtomicExpr::AO__atomic_add_fetch: + case AtomicExpr::AO__atomic_max_fetch: + case AtomicExpr::AO__atomic_min_fetch: case AtomicExpr::AO__atomic_sub_fetch: + case AtomicExpr::AO__c11_atomic_fetch_max: + case AtomicExpr::AO__c11_atomic_fetch_min: + case AtomicExpr::AO__opencl_atomic_fetch_max: + case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_max: + case AtomicExpr::AO__hip_atomic_fetch_min: ShouldCastToIntPtrTy = !MemTy->isFloatingType(); [[fallthrough]]; @@ -934,13 +950,9 @@ case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_xor: case AtomicExpr::AO__c11_atomic_fetch_nand: - case AtomicExpr::AO__c11_atomic_fetch_max: - case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_xor: - case AtomicExpr::AO__opencl_atomic_fetch_min: - case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_or: @@ -952,12 +964,6 @@ case AtomicExpr::AO__atomic_or_fetch: case AtomicExpr::AO__atomic_xor_fetch: case AtomicExpr::AO__atomic_nand_fetch: - case AtomicExpr::AO__atomic_max_fetch: - case AtomicExpr::AO__atomic_min_fetch: - case AtomicExpr::AO__atomic_fetch_max: - case AtomicExpr::AO__hip_atomic_fetch_max: - case AtomicExpr::AO__atomic_fetch_min: - case AtomicExpr::AO__hip_atomic_fetch_min: Val1 = EmitValToTemp(*this, E->getVal1()); break; } Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -6377,7 +6377,7 @@ Op == AtomicExpr::AO__atomic_store_n || Op == AtomicExpr::AO__atomic_exchange_n || Op == AtomicExpr::AO__atomic_compare_exchange_n; - bool IsAddSub = false; + bool AllowFP = false; switch (Op) { case AtomicExpr::AO__c11_atomic_init: @@ -6403,18 +6403,26 @@ case AtomicExpr::AO__atomic_store_n: Form = Copy; break; - case AtomicExpr::AO__hip_atomic_fetch_add: - case AtomicExpr::AO__hip_atomic_fetch_min: - case AtomicExpr::AO__hip_atomic_fetch_max: - case AtomicExpr::AO__c11_atomic_fetch_add: - case AtomicExpr::AO__c11_atomic_fetch_sub: - case AtomicExpr::AO__opencl_atomic_fetch_add: - case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_add: + case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_sub: case AtomicExpr::AO__atomic_add_fetch: + case AtomicExpr::AO__atomic_max_fetch: + case AtomicExpr::AO__atomic_min_fetch: case AtomicExpr::AO__atomic_sub_fetch: - IsAddSub = true; + case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__c11_atomic_fetch_max: + case AtomicExpr::AO__c11_atomic_fetch_min: + case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_max: + case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__opencl_atomic_fetch_sub: + case AtomicExpr::AO__hip_atomic_fetch_add: + case AtomicExpr::AO__hip_atomic_fetch_max: + case AtomicExpr::AO__hip_atomic_fetch_min: + AllowFP = true; Form = Arithmetic; break; case AtomicExpr::AO__c11_atomic_fetch_and: @@ -6437,16 +6445,6 @@ case AtomicExpr::AO__atomic_nand_fetch: Form = Arithmetic; break; - case AtomicExpr::AO__c11_atomic_fetch_min: - case AtomicExpr::AO__c11_atomic_fetch_max: - case AtomicExpr::AO__opencl_atomic_fetch_min: - case AtomicExpr::AO__opencl_atomic_fetch_max: - case AtomicExpr::AO__atomic_min_fetch: - case AtomicExpr::AO__atomic_max_fetch: - case AtomicExpr::AO__atomic_fetch_min: - case AtomicExpr::AO__atomic_fetch_max: - Form = Arithmetic; - break; case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__hip_atomic_exchange: @@ -6548,12 +6546,12 @@ return false; return true; }; - if (IsAddSub && !IsAllowedValueType(ValType)) { + if (AllowFP && !IsAllowedValueType(ValType)) { Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_atomic_int_ptr_or_fp) << IsC11 << Ptr->getType() << Ptr->getSourceRange(); return ExprError(); } - if (!IsAddSub && !ValType->isIntegerType()) { + if (!AllowFP && !ValType->isIntegerType()) { Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_atomic_int) << IsC11 << Ptr->getType() << Ptr->getSourceRange(); return ExprError(); Index: clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu +++ clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu @@ -1,29 +1,98 @@ -// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ +// RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \ // RUN: -fnative-half-arguments-and-returns | FileCheck %s +// RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \ +// RUN: -fcuda-is-device -target-cpu gfx1100 -fnative-half-type \ +// RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefix=SAFE %s + +// RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \ +// RUN: -fcuda-is-device -target-cpu gfx940 -fnative-half-type \ +// RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics \ +// RUN: | FileCheck -check-prefix=UNSAFE %s + // REQUIRES: amdgpu-registered-target #include "Inputs/cuda.h" #include -__device__ float ffp1(float *p) { +__global__ void ffp1(float *p) { // CHECK-LABEL: @_Z4ffp1Pf // CHECK: atomicrmw fadd ptr {{.*}} monotonic - return __atomic_fetch_add(p, 1.0f, memory_order_relaxed); + // CHECK: atomicrmw fmax ptr {{.*}} monotonic + // CHECK: atomicrmw fmin ptr {{.*}} monotonic + // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic + // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic + // SAFE: _Z4ffp1Pf + // SAFE: global_atomic_cmpswap + // SAFE: global_atomic_cmpswap + // SAFE: global_atomic_cmpswap + // SAFE: global_atomic_cmpswap + // SAFE: global_atomic_cmpswap + // UNSAFE: _Z4ffp1Pf + // UNSAFE: global_atomic_add_f32 + // UNSAFE: global_atomic_cmpswap + // UNSAFE: global_atomic_cmpswap + // UNSAFE: global_atomic_cmpswap + // UNSAFE: global_atomic_cmpswap + __atomic_fetch_add(p, 1.0f, memory_order_relaxed); + __atomic_fetch_max(p, 1.0f, memory_order_relaxed); + __atomic_fetch_min(p, 1.0f, memory_order_relaxed); + __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); } -__device__ double ffp2(double *p) { +__global__ void ffp2(double *p) { // CHECK-LABEL: @_Z4ffp2Pd // CHECK: atomicrmw fsub ptr {{.*}} monotonic - return __atomic_fetch_sub(p, 1.0, memory_order_relaxed); + // CHECK: atomicrmw fmax ptr {{.*}} monotonic + // CHECK: atomicrmw fmin ptr {{.*}} monotonic + // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic + // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic + // SAFE: _Z4ffp2Pd + // SAFE: global_atomic_cmpswap_b64 + // SAFE: global_atomic_cmpswap_b64 + // SAFE: global_atomic_cmpswap_b64 + // SAFE: global_atomic_cmpswap_b64 + // SAFE: global_atomic_cmpswap_b64 + // UNSAFE: _Z4ffp2Pd + // UNSAFE: global_atomic_cmpswap_x2 + // UNSAFE: global_atomic_cmpswap_x2 + // UNSAFE: global_atomic_cmpswap_x2 + // UNSAFE: global_atomic_cmpswap_x2 + // UNSAFE: global_atomic_cmpswap_x2 + __atomic_fetch_sub(p, 1.0, memory_order_relaxed); + __atomic_fetch_max(p, 1.0, memory_order_relaxed); + __atomic_fetch_min(p, 1.0, memory_order_relaxed); + __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); } // long double is the same as double for amdgcn. -__device__ long double ffp3(long double *p) { +__global__ void ffp3(long double *p) { // CHECK-LABEL: @_Z4ffp3Pe // CHECK: atomicrmw fsub ptr {{.*}} monotonic - return __atomic_fetch_sub(p, 1.0L, memory_order_relaxed); + // CHECK: atomicrmw fmax ptr {{.*}} monotonic + // CHECK: atomicrmw fmin ptr {{.*}} monotonic + // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic + // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic + // SAFE: _Z4ffp3Pe + // SAFE: global_atomic_cmpswap_b64 + // SAFE: global_atomic_cmpswap_b64 + // SAFE: global_atomic_cmpswap_b64 + // SAFE: global_atomic_cmpswap_b64 + // SAFE: global_atomic_cmpswap_b64 + // UNSAFE: _Z4ffp3Pe + // UNSAFE: global_atomic_cmpswap_x2 + // UNSAFE: global_atomic_cmpswap_x2 + // UNSAFE: global_atomic_cmpswap_x2 + // UNSAFE: global_atomic_cmpswap_x2 + // UNSAFE: global_atomic_cmpswap_x2 + __atomic_fetch_sub(p, 1.0L, memory_order_relaxed); + __atomic_fetch_max(p, 1.0L, memory_order_relaxed); + __atomic_fetch_min(p, 1.0L, memory_order_relaxed); + __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); } __device__ double ffp4(double *p, float f) { @@ -39,3 +108,29 @@ // CHECK: atomicrmw fsub ptr {{.*}} monotonic return __atomic_fetch_sub(p, i, memory_order_relaxed); } + +__global__ void ffp6(_Float16 *p) { + // CHECK-LABEL: @_Z4ffp6PDF16 + // CHECK: atomicrmw fadd ptr {{.*}} monotonic + // CHECK: atomicrmw fmax ptr {{.*}} monotonic + // CHECK: atomicrmw fmin ptr {{.*}} monotonic + // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic + // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic + // SAFE: _Z4ffp6PDF16 + // SAFE: global_atomic_cmpswap + // SAFE: global_atomic_cmpswap + // SAFE: global_atomic_cmpswap + // SAFE: global_atomic_cmpswap + // SAFE: global_atomic_cmpswap + // UNSAFE: _Z4ffp6PDF16 + // UNSAFE: global_atomic_cmpswap + // UNSAFE: global_atomic_cmpswap + // UNSAFE: global_atomic_cmpswap + // UNSAFE: global_atomic_cmpswap + // UNSAFE: global_atomic_cmpswap + __atomic_fetch_add(p, 1.0, memory_order_relaxed); + __atomic_fetch_max(p, 1.0, memory_order_relaxed); + __atomic_fetch_min(p, 1.0, memory_order_relaxed); + __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); +} Index: clang/test/Sema/atomic-ops.c =================================================================== --- clang/test/Sema/atomic-ops.c +++ clang/test/Sema/atomic-ops.c @@ -205,8 +205,8 @@ __atomic_fetch_sub(P, 3, memory_order_seq_cst); __atomic_fetch_sub(D, 3, memory_order_seq_cst); __atomic_fetch_sub(s1, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer, pointer or supported floating point type}} - __atomic_fetch_min(D, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer}} - __atomic_fetch_max(P, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer}} + __atomic_fetch_min(D, 3, memory_order_seq_cst); + __atomic_fetch_max(P, 3, memory_order_seq_cst); __atomic_fetch_max(p, 3); // expected-error {{too few arguments to function call, expected 3, have 2}} __c11_atomic_fetch_and(i, 1, memory_order_seq_cst); Index: clang/test/SemaOpenCL/atomic-ops.cl =================================================================== --- clang/test/SemaOpenCL/atomic-ops.cl +++ clang/test/SemaOpenCL/atomic-ops.cl @@ -61,8 +61,8 @@ __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group); __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group); - __opencl_atomic_fetch_min(f, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}} - __opencl_atomic_fetch_max(f, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}} + __opencl_atomic_fetch_min(f, 1, memory_order_seq_cst, memory_scope_work_group); + __opencl_atomic_fetch_max(f, 1, memory_order_seq_cst, memory_scope_work_group); bool cmpexch_1 = __opencl_atomic_compare_exchange_strong(i, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); bool cmpexch_2 = __opencl_atomic_compare_exchange_strong(p, P, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);