diff --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h --- a/clang/lib/Headers/__clang_cuda_complex_builtins.h +++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h @@ -16,7 +16,7 @@ // to work with CUDA and OpenMP target offloading [in C and C++ mode].) #pragma push_macro("__DEVICE__") -#ifdef __OPENMP_NVPTX__ +#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) #pragma omp declare target #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) #else @@ -26,7 +26,7 @@ // To make the algorithms available for C and C++ in CUDA and OpenMP we select // different but equivalent function versions. TODO: For OpenMP we currently // select the native builtins as the overload support for templates is lacking. -#if !defined(__OPENMP_NVPTX__) +#if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__) #define _ISNANd std::isnan #define _ISNANf std::isnan #define _ISINFd std::isinf @@ -276,7 +276,7 @@ #undef _fmaxd #undef _fmaxf -#ifdef __OPENMP_NVPTX__ +#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) #pragma omp end declare target #endif diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex --- a/clang/lib/Headers/openmp_wrappers/complex +++ b/clang/lib/Headers/openmp_wrappers/complex @@ -17,9 +17,18 @@ // We require std::math functions in the complex builtins below. #include +#ifdef __NVPTX__ #define __OPENMP_NVPTX__ #include <__clang_cuda_complex_builtins.h> #undef __OPENMP_NVPTX__ +#endif // __NVPTX__ + +#ifdef __AMDGCN__ +#define __OPENMP_AMDGCN__ +#include <__clang_cuda_complex_builtins.h> +#undef __OPENMP_AMDGCN__ +#endif // __AMDGCN__ + #endif // Grab the host header too. @@ -43,4 +52,4 @@ #pragma omp end declare variant -#endif +#endif // _LIBCPP_STD_VER diff --git a/clang/lib/Headers/openmp_wrappers/complex.h b/clang/lib/Headers/openmp_wrappers/complex.h --- a/clang/lib/Headers/openmp_wrappers/complex.h +++ b/clang/lib/Headers/openmp_wrappers/complex.h @@ -17,10 +17,19 @@ // We require math functions in the complex builtins below. #include +#ifdef __NVPTX__ #define __OPENMP_NVPTX__ #include <__clang_cuda_complex_builtins.h> #undef __OPENMP_NVPTX__ #endif +#ifdef __AMDGCN__ +#define __OPENMP_AMDGCN__ +#include <__clang_cuda_complex_builtins.h> +#undef __OPENMP_AMDGCN__ +#endif + +#endif + // Grab the host header too. #include_next diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.c b/clang/test/Headers/amdgcn-openmp-device-math-complex.c new file mode 100644 --- /dev/null +++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.c @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK + +#include + +void test_complex_f64(double _Complex a) { +// CHECK-LABEL: define {{.*}}test_complex_f64 +#pragma omp target + { + // CHECK: call { double, double } @__divdc3 + // CHECK: call { double, double } @__muldc3 + (void)(a * (a / a)); + } +} + +// CHECK: define weak {{.*}} @__divdc3 +// CHECK-DAG: call double @__ocml_fabs_f64( +// CHECK-DAG: call i32 @__ocml_isnan_f64( +// CHECK-DAG: call i32 @__ocml_isfinite_f64( +// CHECK-DAG: call double @__ocml_copysign_f64( +// CHECK-DAG: call double @__ocml_scalbn_f64( +// CHECK-DAG: call double @__ocml_logb_f64( + +// CHECK: define weak {{.*}} @__muldc3 +// CHECK-DAG: call i32 @__ocml_isnan_f64( +// CHECK-DAG: call i32 @__ocml_isinf_f64( +// CHECK-DAG: call double @__ocml_copysign_f64( + +void test_complex_f32(float _Complex a) { +// CHECK-LABEL: define {{.*}}test_complex_f32 +#pragma omp target + { + // CHECK: call [2 x i32] @__divsc3 + // CHECK: call [2 x i32] @__mulsc3 + (void)(a * (a / a)); + } +} + +// CHECK: define weak {{.*}} @__divsc3 +// CHECK-DAG: call float @__ocml_fabs_f32( +// CHECK-DAG: call i32 @__ocml_isnan_f32( +// CHECK-DAG: call i32 @__ocml_isfinite_f32( +// CHECK-DAG: call float @__ocml_copysign_f32( +// CHECK-DAG: call float @__ocml_scalbn_f32( +// CHECK-DAG: call float @__ocml_logb_f32( + +// CHECK: define weak {{.*}} @__mulsc3 +// CHECK-DAG: call i32 @__ocml_isnan_f32( +// CHECK-DAG: call i32 @__ocml_isinf_f32( +// CHECK-DAG: call float @__ocml_copysign_f32(