diff --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h --- a/clang/lib/Headers/__clang_cuda_complex_builtins.h +++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h @@ -15,8 +15,61 @@ // operations. (These implementations come from libc++, and have been modified // to work with CUDA.) -extern "C" inline __device__ double _Complex __muldc3(double __a, double __b, - double __c, double __d) { +#ifdef _OPENMP +#ifdef __cplusplus +#define __DEVICE__ extern "C" inline +#else +#define __DEVICE__ __attribute__((always_inline)) +#endif // __cplusplus +#else +#define __DEVICE__ extern "C" inline __device__ +#endif // _OPENMP + +#ifdef _OPENMP +#define MAX(x, y) fmax((x), (y)) +#define MAXF(x, y) fmaxf((x), (y)) +#define IS_NAN(x) __isnan(x) +#define IS_NANF(x) __isnanf(x) +#define IS_INF(x) __isinf(x) +#define IS_INFF(x) __isinff(x) +#define IS_FINITE(x) __finite(x) +#define IS_FINITEF(x) __finitef(x) +#define COPYSIGN(x, y) copysign((x), (y)) +#define COPYSIGNF(x, y) copysignf((x), (y)) +#define LOGB(x) logb(x) +#define LOGBF(x) logbf(x) +#define ABS(x) fabs(x) +#define ABSF(x) fabsf(x) +#define SCALBN(x, y) scalbn((x), (y)) +#define SCALBNF(x, y) scalbnf((x), (y)) +#else +// Can't use std::max, because that's defined in , and we don't +// want to pull that in for every compile. The CUDA headers define +// ::max(float, float) and ::max(double, double), which is sufficient for us. +#define MAX(x, y) max((x), (y)) +#define MAXF(x, y) max((x), (y)) +#define IS_NAN(x) std::isnan(x) +#define IS_NANF(x) std::isnan(x) +#define IS_INF(x) std::isinf(x) +#define IS_INFF(x) std::isinf(x) +#define IS_FINITE(x) std::isfinite(x) +#define IS_FINITEF(x) std::isfinite(x) +#define COPYSIGN(x, y) std::copysign((x), (y)) +#define COPYSIGNF(x, y) std::copysign((x), (y)) +#define LOGB(x) std::logb(x) +#define LOGBF(x) std::logb(x) +#define ABS(x) std::abs(x) +#define ABSF(x) std::abs(x) +#define SCALBN(x, y) std::scalbn((x), (y)) +#define SCALBNF(x, y) std::scalbn((x), (y)) +#endif // _OPENMP + +#ifdef _OPENMP +#pragma omp declare target +#endif // _OPENMP + +__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, + double __d) { double __ac = __a * __c; double __bd = __b * __d; double __ad = __a * __d; @@ -24,50 +77,49 @@ double _Complex z; __real__(z) = __ac - __bd; __imag__(z) = __ad + __bc; - if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { + if (IS_NAN(__real__(z)) && IS_NAN(__imag__(z))) { int __recalc = 0; - if (std::isinf(__a) || std::isinf(__b)) { - __a = std::copysign(std::isinf(__a) ? 1 : 0, __a); - __b = std::copysign(std::isinf(__b) ? 1 : 0, __b); - if (std::isnan(__c)) - __c = std::copysign(0, __c); - if (std::isnan(__d)) - __d = std::copysign(0, __d); + if (IS_INF(__a) || IS_INF(__b)) { + __a = COPYSIGN(IS_INF(__a) ? 1 : 0, __a); + __b = COPYSIGN(IS_INF(__b) ? 1 : 0, __b); + if (IS_NAN(__c)) + __c = COPYSIGN(0, __c); + if (IS_NAN(__d)) + __d = COPYSIGN(0, __d); __recalc = 1; } - if (std::isinf(__c) || std::isinf(__d)) { - __c = std::copysign(std::isinf(__c) ? 1 : 0, __c); - __d = std::copysign(std::isinf(__d) ? 1 : 0, __d); - if (std::isnan(__a)) - __a = std::copysign(0, __a); - if (std::isnan(__b)) - __b = std::copysign(0, __b); + if (IS_INF(__c) || IS_INF(__d)) { + __c = COPYSIGN(IS_INF(__c) ? 1 : 0, __c); + __d = COPYSIGN(IS_INF(__d) ? 1 : 0, __d); + if (IS_NAN(__a)) + __a = COPYSIGN(0, __a); + if (IS_NAN(__b)) + __b = COPYSIGN(0, __b); __recalc = 1; } - if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) || - std::isinf(__ad) || std::isinf(__bc))) { - if (std::isnan(__a)) - __a = std::copysign(0, __a); - if (std::isnan(__b)) - __b = std::copysign(0, __b); - if (std::isnan(__c)) - __c = std::copysign(0, __c); - if (std::isnan(__d)) - __d = std::copysign(0, __d); + if (!__recalc && (IS_INF(__ac) || IS_INF(__bd) || + IS_INF(__ad) || IS_INF(__bc))) { + if (IS_NAN(__a)) + __a = COPYSIGN(0, __a); + if (IS_NAN(__b)) + __b = COPYSIGN(0, __b); + if (IS_NAN(__c)) + __c = COPYSIGN(0, __c); + if (IS_NAN(__d)) + __d = COPYSIGN(0, __d); __recalc = 1; } if (__recalc) { // Can't use std::numeric_limits::infinity() -- that doesn't have // a device overload (and isn't constexpr before C++11, naturally). - __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); - __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); + __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d); + __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c); } } return z; } -extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b, - float __c, float __d) { +__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) { float __ac = __a * __c; float __bd = __b * __d; float __ad = __a * __d; @@ -75,36 +127,36 @@ float _Complex z; __real__(z) = __ac - __bd; __imag__(z) = __ad + __bc; - if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { + if (IS_NANF(__real__(z)) && IS_NANF(__imag__(z))) { int __recalc = 0; - if (std::isinf(__a) || std::isinf(__b)) { - __a = std::copysign(std::isinf(__a) ? 1 : 0, __a); - __b = std::copysign(std::isinf(__b) ? 1 : 0, __b); - if (std::isnan(__c)) - __c = std::copysign(0, __c); - if (std::isnan(__d)) - __d = std::copysign(0, __d); + if (IS_INFF(__a) || IS_INFF(__b)) { + __a = COPYSIGNF(IS_INFF(__a) ? 1 : 0, __a); + __b = COPYSIGNF(IS_INFF(__b) ? 1 : 0, __b); + if (IS_NANF(__c)) + __c = COPYSIGNF(0, __c); + if (IS_NANF(__d)) + __d = COPYSIGNF(0, __d); __recalc = 1; } - if (std::isinf(__c) || std::isinf(__d)) { - __c = std::copysign(std::isinf(__c) ? 1 : 0, __c); - __d = std::copysign(std::isinf(__d) ? 1 : 0, __d); - if (std::isnan(__a)) - __a = std::copysign(0, __a); - if (std::isnan(__b)) - __b = std::copysign(0, __b); + if (IS_INFF(__c) || IS_INFF(__d)) { + __c = COPYSIGNF(IS_INFF(__c) ? 1 : 0, __c); + __d = COPYSIGNF(IS_INFF(__d) ? 1 : 0, __d); + if (IS_NANF(__a)) + __a = COPYSIGNF(0, __a); + if (IS_NANF(__b)) + __b = COPYSIGNF(0, __b); __recalc = 1; } - if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) || - std::isinf(__ad) || std::isinf(__bc))) { - if (std::isnan(__a)) - __a = std::copysign(0, __a); - if (std::isnan(__b)) - __b = std::copysign(0, __b); - if (std::isnan(__c)) - __c = std::copysign(0, __c); - if (std::isnan(__d)) - __d = std::copysign(0, __d); + if (!__recalc && (IS_INFF(__ac) || IS_INFF(__bd) || + IS_INFF(__ad) || IS_INFF(__bc))) { + if (IS_NANF(__a)) + __a = COPYSIGNF(0, __a); + if (IS_NANF(__b)) + __b = COPYSIGNF(0, __b); + if (IS_NANF(__c)) + __c = COPYSIGNF(0, __c); + if (IS_NANF(__d)) + __d = COPYSIGNF(0, __d); __recalc = 1; } if (__recalc) { @@ -115,36 +167,33 @@ return z; } -extern "C" inline __device__ double _Complex __divdc3(double __a, double __b, - double __c, double __d) { +__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, + double __d) { int __ilogbw = 0; - // Can't use std::max, because that's defined in , and we don't - // want to pull that in for every compile. The CUDA headers define - // ::max(float, float) and ::max(double, double), which is sufficient for us. - double __logbw = std::logb(max(std::abs(__c), std::abs(__d))); - if (std::isfinite(__logbw)) { + double __logbw = LOGB(MAX(ABS(__c), ABS(__d))); + if (IS_FINITE(__logbw)) { __ilogbw = (int)__logbw; - __c = std::scalbn(__c, -__ilogbw); - __d = std::scalbn(__d, -__ilogbw); + __c = SCALBN(__c, -__ilogbw); + __d = SCALBN(__d, -__ilogbw); } double __denom = __c * __c + __d * __d; double _Complex z; - __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); - __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); - if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { - if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) { - __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a; - __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b; - } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) && - std::isfinite(__d)) { - __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a); - __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b); - __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); - __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); - } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) && - std::isfinite(__b)) { - __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c); - __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d); + __real__(z) = SCALBN((__a * __c + __b * __d) / __denom, -__ilogbw); + __imag__(z) = SCALBN((__b * __c - __a * __d) / __denom, -__ilogbw); + if (IS_NAN(__real__(z)) && IS_NAN(__imag__(z))) { + if ((__denom == 0.0) && (!IS_NAN(__a) || !IS_NAN(__b))) { + __real__(z) = COPYSIGN(__builtin_huge_val(), __c) * __a; + __imag__(z) = COPYSIGN(__builtin_huge_val(), __c) * __b; + } else if ((IS_INF(__a) || IS_INF(__b)) && IS_FINITE(__c) && + IS_FINITE(__d)) { + __a = COPYSIGN(IS_INF(__a) ? 1.0 : 0.0, __a); + __b = COPYSIGN(IS_INF(__b) ? 1.0 : 0.0, __b); + __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d); + __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d); + } else if (IS_INF(__logbw) && __logbw > 0.0 && IS_FINITE(__a) && + IS_FINITE(__b)) { + __c = COPYSIGN(IS_INF(__c) ? 1.0 : 0.0, __c); + __d = COPYSIGN(IS_INF(__d) ? 1.0 : 0.0, __d); __real__(z) = 0.0 * (__a * __c + __b * __d); __imag__(z) = 0.0 * (__b * __c - __a * __d); } @@ -152,33 +201,32 @@ return z; } -extern "C" inline __device__ float _Complex __divsc3(float __a, float __b, - float __c, float __d) { +__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { int __ilogbw = 0; - float __logbw = std::logb(max(std::abs(__c), std::abs(__d))); - if (std::isfinite(__logbw)) { + float __logbw = LOGBF(MAXF(ABSF(__c), ABSF(__d))); + if (IS_FINITEF(__logbw)) { __ilogbw = (int)__logbw; - __c = std::scalbn(__c, -__ilogbw); - __d = std::scalbn(__d, -__ilogbw); + __c = SCALBNF(__c, -__ilogbw); + __d = SCALBNF(__d, -__ilogbw); } float __denom = __c * __c + __d * __d; float _Complex z; - __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); - __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); - if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { - if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) { - __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a; - __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b; - } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) && - std::isfinite(__d)) { - __a = std::copysign(std::isinf(__a) ? 1 : 0, __a); - __b = std::copysign(std::isinf(__b) ? 1 : 0, __b); + __real__(z) = SCALBNF((__a * __c + __b * __d) / __denom, -__ilogbw); + __imag__(z) = SCALBNF((__b * __c - __a * __d) / __denom, -__ilogbw); + if (IS_NANF(__real__(z)) && IS_NANF(__imag__(z))) { + if ((__denom == 0) && (!IS_NANF(__a) || !IS_NANF(__b))) { + __real__(z) = COPYSIGNF(__builtin_huge_valf(), __c) * __a; + __imag__(z) = COPYSIGNF(__builtin_huge_valf(), __c) * __b; + } else if ((IS_INFF(__a) || IS_INFF(__b)) && IS_FINITEF(__c) && + IS_FINITEF(__d)) { + __a = COPYSIGNF(IS_INFF(__a) ? 1 : 0, __a); + __b = COPYSIGNF(IS_INFF(__b) ? 1 : 0, __b); __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); - } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) && - std::isfinite(__b)) { - __c = std::copysign(std::isinf(__c) ? 1 : 0, __c); - __d = std::copysign(std::isinf(__d) ? 1 : 0, __d); + } else if (IS_INFF(__logbw) && __logbw > 0 && IS_FINITEF(__a) && + IS_FINITEF(__b)) { + __c = COPYSIGNF(IS_INFF(__c) ? 1 : 0, __c); + __d = COPYSIGNF(IS_INFF(__d) ? 1 : 0, __d); __real__(z) = 0 * (__a * __c + __b * __d); __imag__(z) = 0 * (__b * __c - __a * __d); } @@ -186,4 +234,23 @@ return z; } +#ifdef _OPENMP +#pragma omp end declare target +#endif // _OPENMP + +#undef IS_NAN(x) +#undef IS_NANF(x) +#undef IS_INF(x) +#undef IS_INFF(x) +#undef IS_FINITE(x) +#undef IS_FINITEF(x) +#undef COPYSIGN(x, y) +#undef COPYSIGNF(x, y) +#undef LOGB(x) +#undef LOGBF(x) +#undef ABS(x) +#undef ABSF(x) +#undef SCALBN(x, y) +#undef SCALBNF(x, y) + #endif // __CLANG_CUDA_COMPLEX_BUILTINS diff --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h --- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h +++ b/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h @@ -26,6 +26,8 @@ #include <__clang_cuda_libdevice_declares.h> /// Provide definitions for these functions. #include <__clang_cuda_device_functions.h> +/// Provide definitions for complex math functions. +#include <__clang_cuda_complex_builtins.h> #undef __CUDA__ diff --git a/clang/test/Headers/nvptx_device_cmath_functions.c b/clang/test/Headers/nvptx_device_cmath_functions.c --- a/clang/test/Headers/nvptx_device_cmath_functions.c +++ b/clang/test/Headers/nvptx_device_cmath_functions.c @@ -8,6 +8,11 @@ #include +// CHECK-YES-DAG: define {{.*}}hidden { double, double } @__muldc3( +// CHECK-YES-DAG: define {{.*}}hidden { float, float } @__mulsc3( +// CHECK-YES-DAG: define {{.*}}hidden { double, double } @__divdc3( +// CHECK-YES-DAG: define {{.*}}hidden { float, float } @__divsc3( + void test_sqrt(double a1) { #pragma omp target { @@ -23,3 +28,11 @@ double l5 = abs((int)a1); } } + +// CHECK-YES-NOT: @{{__mulsc3|__divsc3}}( +void test_cmplx(float _Complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +} diff --git a/clang/test/Headers/nvptx_device_cmath_functions.cpp b/clang/test/Headers/nvptx_device_cmath_functions.cpp --- a/clang/test/Headers/nvptx_device_cmath_functions.cpp +++ b/clang/test/Headers/nvptx_device_cmath_functions.cpp @@ -24,3 +24,15 @@ double l5 = abs((int)a1); } } + +// CHECK-YES-DAG: call { float, float } @__mulsc3( +// CHECK-YES-DAG: call { float, float } @__divsc3( +void test_cmplx(float _Complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +} +// CHECK-YES-DAG: define {{.*}}hidden { float, float } @__mulsc3( +// CHECK-YES-DAG: define {{.*}}hidden { float, float } @__divsc3( + diff --git a/clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp b/clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp --- a/clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp +++ b/clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp @@ -24,3 +24,15 @@ double l5 = abs((int)a1); } } + +// CHECK-YES-DAG: call { double, double } @__muldc3( +// CHECK-YES-DAG: call { double, double } @__divdc3( +void test_cmplx(double _Complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +} +// CHECK-YES-DAG: define {{.*}}hidden { double, double } @__muldc3( +// CHECK-YES-DAG: define {{.*}}hidden { double, double } @__divdc3( +