Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7960,6 +7960,9 @@ def err_atomic_op_needs_trivial_copy : Error< "address argument to atomic operation must be a pointer to a " "trivially-copyable type (%0 invalid)">; +def err_atomic_op_needs_atomic_int_ptr_or_fp : Error< + "address argument to atomic operation must be a pointer to %select{|atomic }0" + "integer, pointer or supported floating point type (%1 invalid)">; def err_atomic_op_needs_atomic_int_or_ptr : Error< "address argument to atomic operation must be a pointer to %select{|atomic }0" "integer or pointer (%1 invalid)">; Index: clang/include/clang/Basic/TargetInfo.h =================================================================== --- clang/include/clang/Basic/TargetInfo.h +++ clang/include/clang/Basic/TargetInfo.h @@ -1455,6 +1455,12 @@ /// Whether target allows debuginfo types for decl only variables. virtual bool allowDebugInfoForExternalVar() const { return false; } + /// Whether floating point atomic fetch add/sub is supported. + virtual bool + isFPAtomicFetchAddSubSupported(const llvm::fltSemantics &FS) const { + return false; + } + protected: /// Copy type and layout related info. void copyAuxTarget(const TargetInfo *Aux); Index: clang/lib/Basic/Targets/AMDGPU.h =================================================================== --- clang/lib/Basic/Targets/AMDGPU.h +++ clang/lib/Basic/Targets/AMDGPU.h @@ -389,6 +389,17 @@ void setAuxTarget(const TargetInfo *Aux) override; bool hasExtIntType() const override { return true; } + + bool + isFPAtomicFetchAddSubSupported(const llvm::fltSemantics &FS) const override { + switch (llvm::APFloat::SemanticsToEnum(FS)) { + case llvm::APFloat::S_IEEEsingle: + case llvm::APFloat::S_IEEEdouble: + return true; + default: + return false; + } + } }; } // namespace targets Index: clang/lib/CodeGen/CGAtomic.cpp =================================================================== --- clang/lib/CodeGen/CGAtomic.cpp +++ clang/lib/CodeGen/CGAtomic.cpp @@ -595,21 +595,25 @@ break; case AtomicExpr::AO__atomic_add_fetch: - PostOp = llvm::Instruction::Add; + PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FAdd + : llvm::Instruction::Add; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: - Op = llvm::AtomicRMWInst::Add; + Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd + : llvm::AtomicRMWInst::Add; break; case AtomicExpr::AO__atomic_sub_fetch: - PostOp = llvm::Instruction::Sub; + PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FSub + : llvm::Instruction::Sub; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_sub: case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: - Op = llvm::AtomicRMWInst::Sub; + Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FSub + : llvm::AtomicRMWInst::Sub; break; case AtomicExpr::AO__atomic_min_fetch: @@ -807,6 +811,7 @@ bool Oversized = getContext().toBits(sizeChars) > MaxInlineWidthInBits; bool Misaligned = (Ptr.getAlignment() % sizeChars) != 0; bool UseLibcall = Misaligned | Oversized; + bool ShouldCastToIntPtrTy = true; if (UseLibcall) { CGM.getDiags().Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned) @@ -876,11 +881,16 @@ EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Temp, Val1Ty)); break; } - LLVM_FALLTHROUGH; + LLVM_FALLTHROUGH; case AtomicExpr::AO__atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_sub: case AtomicExpr::AO__atomic_add_fetch: case AtomicExpr::AO__atomic_sub_fetch: + if (MemTy->isFloatingType()) { + ShouldCastToIntPtrTy = false; + } + LLVM_FALLTHROUGH; + case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__opencl_atomic_store: @@ -921,15 +931,23 @@ LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy); AtomicInfo Atomics(*this, AtomicVal); - Ptr = Atomics.emitCastToAtomicIntPointer(Ptr); - if (Val1.isValid()) Val1 = Atomics.convertToAtomicIntPointer(Val1); - if (Val2.isValid()) Val2 = Atomics.convertToAtomicIntPointer(Val2); - if (Dest.isValid()) - Dest = Atomics.emitCastToAtomicIntPointer(Dest); - else if (E->isCmpXChg()) + if (ShouldCastToIntPtrTy) { + Ptr = Atomics.emitCastToAtomicIntPointer(Ptr); + if (Val1.isValid()) + Val1 = Atomics.convertToAtomicIntPointer(Val1); + if (Val2.isValid()) + Val2 = Atomics.convertToAtomicIntPointer(Val2); + } + if (Dest.isValid()) { + if (ShouldCastToIntPtrTy) + Dest = Atomics.emitCastToAtomicIntPointer(Dest); + } else if (E->isCmpXChg()) Dest = CreateMemTemp(RValTy, "cmpxchg.bool"); - else if (!RValTy->isVoidType()) - Dest = Atomics.emitCastToAtomicIntPointer(Atomics.CreateTempAlloca()); + else if (!RValTy->isVoidType()) { + Dest = Atomics.CreateTempAlloca(); + if (ShouldCastToIntPtrTy) + Dest = Atomics.emitCastToAtomicIntPointer(Dest); + } // Use a library call. See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary . if (UseLibcall) { Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -4703,9 +4703,11 @@ // For an arithmetic operation, the implied arithmetic must be well-formed. if (Form == Arithmetic) { // gcc does not enforce these rules for GNU atomics, but we do so for sanity. - if (IsAddSub && !ValType->isIntegerType() - && !ValType->isPointerType()) { - Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_atomic_int_or_ptr) + if (IsAddSub && !ValType->isIntegerType() && !ValType->isPointerType() && + (!ValType->isFloatingType() || + !Context.getTargetInfo().isFPAtomicFetchAddSubSupported( + Context.getFloatTypeSemantics(ValType)))) { + Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_atomic_int_ptr_or_fp) << IsC11 << Ptr->getType() << Ptr->getSourceRange(); return ExprError(); } @@ -4832,7 +4834,8 @@ // passed by address. For the rest, GNU uses by-address and C11 uses // by-value. assert(Form != Load); - if (Form == Init || (Form == Arithmetic && ValType->isIntegerType())) + if (Form == Init || (Form == Arithmetic && ValType->isIntegerType()) || + (IsAddSub && ValType->isFloatingType())) Ty = ValType; else if (Form == Copy || Form == Xchg) { if (IsPassedByAddress) { Index: clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ +// RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \ +// RUN: -fnative-half-arguments-and-returns | FileCheck %s + +// REQUIRES: amdgpu-registered-target + +#include "Inputs/cuda.h" +#include + +__device__ float ffp1(float *p) { + // CHECK-LABEL: @_Z4ffp1Pf + // CHECK: atomicrmw fadd float* {{.*}} monotonic + return __atomic_fetch_add(p, 1.0f, memory_order_relaxed); +} + +__device__ double ffp2(double *p) { + // CHECK-LABEL: @_Z4ffp2Pd + // CHECK: atomicrmw fsub double* {{.*}} monotonic + return __atomic_fetch_sub(p, 1.0, memory_order_relaxed); +} + +// long double is the same as double for amdgcn. +__device__ long double ffp3(long double *p) { + // CHECK-LABEL: @_Z4ffp3Pe + // CHECK: atomicrmw fsub double* {{.*}} monotonic + return __atomic_fetch_sub(p, 1.0L, memory_order_relaxed); +} Index: clang/test/CodeGenOpenCL/atomic-ops.cl =================================================================== --- clang/test/CodeGenOpenCL/atomic-ops.cl +++ clang/test/CodeGenOpenCL/atomic-ops.cl @@ -1,12 +1,17 @@ -// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa \ +// RUN: | opt -instnamer -S | FileCheck %s // Also test serialization of atomic operations here, to avoid duplicating the test. -// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl -// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa +// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa \ +// RUN: -emit-llvm -o - | opt -instnamer -S | FileCheck %s #ifndef ALREADY_INCLUDED #define ALREADY_INCLUDED +#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable + typedef __INTPTR_TYPE__ intptr_t; typedef int int8 __attribute__((ext_vector_type(8))); @@ -185,6 +190,18 @@ return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group); } +float ff4(global atomic_float *d, float a) { + // CHECK-LABEL: @ff4 + // CHECK: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic + return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); +} + +float ff5(global atomic_double *d, double a) { + // CHECK-LABEL: @ff5 + // CHECK: atomicrmw fadd double addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic + return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); +} + // CHECK-LABEL: @atomic_init_foo void atomic_init_foo() { Index: clang/test/Sema/atomic-ops.c =================================================================== --- clang/test/Sema/atomic-ops.c +++ clang/test/Sema/atomic-ops.c @@ -99,7 +99,8 @@ #define _AS2 __attribute__((address_space(2))) void f(_Atomic(int) *i, const _Atomic(int) *ci, - _Atomic(int*) *p, _Atomic(float) *d, + _Atomic(int*) *p, _Atomic(float) *d, _Atomic(double) *d2, + _Atomic(long double) *d3, int *I, const int *CI, int **P, float *D, struct S *s1, struct S *s2) { __c11_atomic_init(I, 5); // expected-error {{pointer to _Atomic}} @@ -166,13 +167,15 @@ __c11_atomic_fetch_add(i, 1, memory_order_seq_cst); __c11_atomic_fetch_add(p, 1, memory_order_seq_cst); - __c11_atomic_fetch_add(d, 1, memory_order_seq_cst); // expected-error {{must be a pointer to atomic integer or pointer}} + __c11_atomic_fetch_add(d, 1.0f, memory_order_seq_cst); // expected-error {{must be a pointer to atomic integer, pointer or supported floating point type}} + __c11_atomic_fetch_add(d2, 1.0, memory_order_seq_cst); // expected-error {{must be a pointer to atomic integer, pointer or supported floating point type}} + __c11_atomic_fetch_add(d3, 1.0, memory_order_seq_cst); // expected-error {{must be a pointer to atomic integer, pointer or supported floating point type}} - __atomic_fetch_add(i, 3, memory_order_seq_cst); // expected-error {{pointer to integer or pointer}} + __atomic_fetch_add(i, 3, memory_order_seq_cst); // expected-error {{pointer to integer, pointer or supported floating point type}} __atomic_fetch_sub(I, 3, memory_order_seq_cst); __atomic_fetch_sub(P, 3, memory_order_seq_cst); - __atomic_fetch_sub(D, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer or pointer}} - __atomic_fetch_sub(s1, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer or pointer}} + __atomic_fetch_sub(D, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer, pointer or supported floating point type}} + __atomic_fetch_sub(s1, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer, pointer or supported floating point type}} __atomic_fetch_min(D, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer}} __atomic_fetch_max(P, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer}} __atomic_fetch_max(p, 3); // expected-error {{too few arguments to function call, expected 3, have 2}} Index: clang/test/SemaCUDA/amdgpu-atomic-ops.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/amdgpu-atomic-ops.cu @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 %s -verify -fsyntax-only -triple=amdgcn-amd-amdhsa \ +// RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \ +// RUN: -fnative-half-arguments-and-returns + +// REQUIRES: amdgpu-registered-target + +#include "Inputs/cuda.h" +#include + +__device__ _Float16 test_Flot16(_Float16 *p) { + return __atomic_fetch_sub(p, 1.0f16, memory_order_relaxed); + // expected-error@-1 {{address argument to atomic operation must be a pointer to integer, pointer or supported floating point type ('_Float16 *' invalid)}} +} + +__device__ __fp16 test_fp16(__fp16 *p) { + return __atomic_fetch_sub(p, 1.0f16, memory_order_relaxed); + // expected-error@-1 {{address argument to atomic operation must be a pointer to integer, pointer or supported floating point type ('__fp16 *' invalid)}} +} Index: clang/test/SemaOpenCL/atomic-ops.cl =================================================================== --- clang/test/SemaOpenCL/atomic-ops.cl +++ clang/test/SemaOpenCL/atomic-ops.cl @@ -1,10 +1,13 @@ -// RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -fsyntax-only -triple=spir64 -// RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -fsyntax-only -triple=amdgcn-amdhsa-amd-opencl +// RUN: %clang_cc1 %s -cl-std=CL2.0 -verify=expected,spir \ +// RUN: -fsyntax-only -triple=spir64 +// RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -fsyntax-only \ +// RUN: -triple=amdgcn-amd-amdhsa // Basic parsing/Sema tests for __opencl_atomic_* #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable +#pragma OPENCL EXTENSION cl_khr_fp16 : enable typedef __INTPTR_TYPE__ intptr_t; typedef int int8 __attribute__((ext_vector_type(8))); @@ -36,7 +39,7 @@ atomic_int gn; void f(atomic_int *i, const atomic_int *ci, - atomic_intptr_t *p, atomic_float *d, + atomic_intptr_t *p, atomic_float *d, atomic_double *d2, atomic_half *h, // expected-error {{unknown type name 'atomic_half'}} int *I, const int *CI, intptr_t *P, float *D, struct S *s1, struct S *s2, global atomic_int *i_g, local atomic_int *i_l, private atomic_int *i_p, @@ -70,7 +73,8 @@ __opencl_atomic_fetch_add(i, 1, memory_order_seq_cst, memory_scope_work_group); __opencl_atomic_fetch_add(p, 1, memory_order_seq_cst, memory_scope_work_group); - __opencl_atomic_fetch_add(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}} + __opencl_atomic_fetch_add(d, 1.0f, memory_order_seq_cst, memory_scope_work_group); // spir-error {{address argument to atomic operation must be a pointer to atomic integer, pointer or supported floating point type ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}} + __opencl_atomic_fetch_add(d2, 1.0, memory_order_seq_cst, memory_scope_work_group); // spir-error {{address argument to atomic operation must be a pointer to atomic integer, pointer or supported floating point type ('__generic atomic_double *' (aka '__generic _Atomic(double) *') invalid)}} __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group); __opencl_atomic_fetch_and(p, 1, memory_order_seq_cst, memory_scope_work_group); __opencl_atomic_fetch_and(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}