diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -214,8 +214,8 @@ TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "t", "gfx90a-insts") -TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "atomic-fadd-rtn-insts") -TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "atomic-buffer-global-pk-add-f16-insts") +TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "atomic-fadd-rtn-insts|atomic-fadd-no-rtn-insts") +TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "atomic-buffer-global-pk-add-f16-insts|atomic-fadd-no-rtn-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmin_f64, "dd*1d", "t", "gfx90a-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmax_f64, "dd*1d", "t", "gfx90a-insts") diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -4477,6 +4477,21 @@ bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + + if (BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16 || + BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32) { + auto TargetID = Context.getTargetInfo().getTargetID(); + if (!TargetID || TargetID->find("gfx908") == std::string::npos) + return false; + + // GFX908/MI100 global atomic f32/f16 are not returning old value from + // memory. By overriding return type of the builtin to 'void' we will force + // clang to throw error if anyone is expecting to receive return value from + // the builtin. + TheCall->setType(Context.VoidTy); + return false; + } + // position of memory order and scope arguments in the builtin unsigned OrderIndex, ScopeIndex; switch (BuiltinID) { diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -70,11 +70,11 @@ // GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX908: "target-features"="+16-bit-insts,+atomic-fadd-no-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-no-rtn-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-no-rtn-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f16-gfx9-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f16-gfx9-err.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f16-gfx9-err.cl @@ -0,0 +1,12 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -verify -S -emit-llvm -o - %s + +typedef half __attribute__((ext_vector_type(2))) half2; + +half2 test_global_fadd_f16(__global half2 *addrh2, half2 xh2) { + half2 *rtn; + + __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts|atomic-fadd-no-rtn-insts}} + *rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts|atomic-fadd-no-rtn-insts}} + return __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts|atomic-fadd-no-rtn-insts}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f32-gfx9-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f32-gfx9-err.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f32-gfx9-err.cl @@ -0,0 +1,10 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -verify -S -emit-llvm -o - %s + +float test_global_fadd_f32(__global float *addr, float x) { + float *rtn; + + __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts|atomic-fadd-no-rtn-insts}} + *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts|atomic-fadd-no-rtn-insts}} + return __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts|atomic-fadd-no-rtn-insts}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl @@ -2,21 +2,12 @@ // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \ // RUN: -verify -S -o - %s -typedef half __attribute__((ext_vector_type(2))) half2; - -void test_global_fadd(__global half2 *addrh2, __local half2 *addrh2l, half2 xh2, - __global float *addrf, float xf, - __global double *addr, double x) { - half2 *half_rtn; - float *fp_rtn; +void test_global_fadd(__global double *addr, double x) { double *rtn; - *half_rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts}} - *fp_rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts}} *rtn = __builtin_amdgcn_global_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f64' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_global_atomic_fmax_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmax_f64' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_global_atomic_fmin_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmin_f64' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f64' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fmin_f64' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fmax_f64' needs target feature gfx90a-insts}} - __builtin_amdgcn_ds_atomic_fadd_v2f16(addrh2l, xh2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2f16' needs target feature atomic-ds-pk-add-16-insts}} } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-noret-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-noret-err.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-noret-err.cl @@ -0,0 +1,40 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \ +// RUN: -verify -S -o - %s + +typedef half __attribute__((ext_vector_type(2))) half2; + +half2 func1(half2 x); // expected-note{{passing argument to parameter 'x' here}} +float func2(float x); // expected-note{{passing argument to parameter 'x' here}} + +half2 test_global_fadd_v2f16(__global half2 *addrh2, half2 xh2) { + half2 *rtn1; + + half2 *rtn2 = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{initializing '__generic half2 *__private' with an expression of incompatible type 'void'}} + *rtn1 = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{assigning to '__generic half2' (vector of 2 'half' values) from incompatible type 'void'}} + *rtn1 = *rtn1 + __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{cannot convert between vector and non-scalar values ('half2' (vector of 2 'half' values) and 'void')}} + func1(__builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2)); // expected-error{{passing 'void' to parameter of incompatible type 'half2' (vector of 2 'half' values)}} + + half2 rtn3; + half2 rtn4 = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{initializing '__private half2' (vector of 2 'half' values) with an expression of incompatible type 'void'}} + rtn3 = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{assigning to '__private half2' (vector of 2 'half' values) from incompatible type 'void'}} + rtn3 = rtn3 + __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{cannot convert between vector and non-scalar values ('half2' (vector of 2 'half' values) and 'void')}} + + return __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{returning 'void' from a function with incompatible result type 'half2' (vector of 2 'half' values)}} +} + +float test_global_fadd_f32(__global float *addrf, float xf) { + float *rtn1; + + float *rtn2 = __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{initializing '__generic float *__private' with an expression of incompatible type 'void'}} + *rtn1 = __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{assigning to '__generic float' from incompatible type 'void'}} + *rtn1 = *rtn1 + __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{invalid operands to binary expression ('__generic float' and 'void')}} + func2(__builtin_amdgcn_global_atomic_fadd_f32(addrf, xf)); // expected-error{{passing 'void' to parameter of incompatible type 'float'}} + + float rtn3; + float rtn4 = __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{initializing '__private float' with an expression of incompatible type 'void'}} + rtn3 = __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{assigning to '__private float' from incompatible type 'void'}} + rtn3 = rtn3 + __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{invalid operands to binary expression ('__private float' and 'void')}} + + return __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{returning 'void' from a function with incompatible result type 'float'}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx908.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx908.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx908.cl @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \ +// RUN: %s -S -emit-llvm -o - | FileCheck %s -check-prefix=CHECK + +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \ +// RUN: -S -o - %s | FileCheck -check-prefix=GFX908 %s + +// REQUIRES: amdgpu-registered-target + +typedef half __attribute__((ext_vector_type(2))) half2; + +// CHECK-LABEL: test_global_add_half2 +// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %0, <2 x half> %1) +// GFX908-LABEL: test_global_add_half2 +// GFX908: global_atomic_pk_add_f16 v[0:1], v2, off +half2 test_global_add_half2(__global half2 *addr, half2 x) { + __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x); + return *addr; +} + +// CHECK-LABEL: test_global_add_float +// CHECK: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %0, float %1) +// GFX908-LABEL: test_global_add_float +// GFX908: global_atomic_add_f32 v[0:1], v2, off +float test_global_add_float(__global float *addr, float x) { + __builtin_amdgcn_global_atomic_fadd_f32(addr, x); + return *addr; +} diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -340,6 +340,7 @@ Features["dot5-insts"] = true; Features["dot6-insts"] = true; Features["mai-insts"] = true; + Features["atomic-fadd-no-rtn-insts"] = true; [[fallthrough]]; case GK_GFX906: Features["dl-insts"] = true;