diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -151,6 +151,8 @@ set(openmp_wrapper_files openmp_wrappers/math.h openmp_wrappers/cmath + openmp_wrappers/complex.h + openmp_wrappers/complex openmp_wrappers/__clang_openmp_device_functions.h openmp_wrappers/new ) diff --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h --- a/clang/lib/Headers/__clang_cuda_complex_builtins.h +++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h @@ -13,10 +13,61 @@ // This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are // libgcc functions that clang assumes are available when compiling c99 complex // operations. (These implementations come from libc++, and have been modified -// to work with CUDA.) +// to work with CUDA and OpenMP target offloading [in C and C++ mode].) -extern "C" inline __device__ double _Complex __muldc3(double __a, double __b, - double __c, double __d) { +#pragma push_macro("__DEVICE__") +#ifdef _OPENMP +#pragma omp declare target +#define __DEVICE__ __attribute__((noinline, nothrow, cold)) +#else +#define __DEVICE__ __device__ inline +#endif + +// Make the algorithms available for C and C++ by selecting the right functions. +#if defined(__cplusplus) +// TODO: In OpenMP mode we cannot overload isinf/isnan/isfinite the way we +// overload all other math functions because old math system headers and not +// always conformant and return an integer instead of a boolean. Until that has +// been addressed we need to work around it. For now, we substituate with the +// calls we would have used to implement those three functions. Note that we +// could use the C alternatives as well. +#define _ISNANd ::__isnan +#define _ISNANf ::__isnanf +#define _ISINFd ::__isinf +#define _ISINFf ::__isinff +#define _ISFINITEd ::__isfinited +#define _ISFINITEf ::__finitef +#define _COPYSIGNd std::copysign +#define _COPYSIGNf std::copysign +#define _SCALBNd std::scalbn +#define _SCALBNf std::scalbn +#define _ABSd std::abs +#define _ABSf std::abs +#define _LOGBd std::logb +#define _LOGBf std::logb +#else +#define _ISNANd isnan +#define _ISNANf isnanf +#define _ISINFd isinf +#define _ISINFf isinff +#define _ISFINITEd isfinite +#define _ISFINITEf isfinitef +#define _COPYSIGNd copysign +#define _COPYSIGNf copysignf +#define _SCALBNd scalbn +#define _SCALBNf scalbnf +#define _ABSd abs +#define _ABSf absf +#define _LOGBd logb +#define _LOGBf logbf +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, + double __d) { double __ac = __a * __c; double __bd = __b * __d; double __ad = __a * __d; @@ -24,50 +75,49 @@ double _Complex z; __real__(z) = __ac - __bd; __imag__(z) = __ad + __bc; - if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { + if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { int __recalc = 0; - if (std::isinf(__a) || std::isinf(__b)) { - __a = std::copysign(std::isinf(__a) ? 1 : 0, __a); - __b = std::copysign(std::isinf(__b) ? 1 : 0, __b); - if (std::isnan(__c)) - __c = std::copysign(0, __c); - if (std::isnan(__d)) - __d = std::copysign(0, __d); + if (_ISINFd(__a) || _ISINFd(__b)) { + __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a); + __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b); + if (_ISNANd(__c)) + __c = _COPYSIGNd(0, __c); + if (_ISNANd(__d)) + __d = _COPYSIGNd(0, __d); __recalc = 1; } - if (std::isinf(__c) || std::isinf(__d)) { - __c = std::copysign(std::isinf(__c) ? 1 : 0, __c); - __d = std::copysign(std::isinf(__d) ? 1 : 0, __d); - if (std::isnan(__a)) - __a = std::copysign(0, __a); - if (std::isnan(__b)) - __b = std::copysign(0, __b); + if (_ISINFd(__c) || _ISINFd(__d)) { + __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c); + __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d); + if (_ISNANd(__a)) + __a = _COPYSIGNd(0, __a); + if (_ISNANd(__b)) + __b = _COPYSIGNd(0, __b); __recalc = 1; } - if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) || - std::isinf(__ad) || std::isinf(__bc))) { - if (std::isnan(__a)) - __a = std::copysign(0, __a); - if (std::isnan(__b)) - __b = std::copysign(0, __b); - if (std::isnan(__c)) - __c = std::copysign(0, __c); - if (std::isnan(__d)) - __d = std::copysign(0, __d); + if (!__recalc && + (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) { + if (_ISNANd(__a)) + __a = _COPYSIGNd(0, __a); + if (_ISNANd(__b)) + __b = _COPYSIGNd(0, __b); + if (_ISNANd(__c)) + __c = _COPYSIGNd(0, __c); + if (_ISNANd(__d)) + __d = _COPYSIGNd(0, __d); __recalc = 1; } if (__recalc) { // Can't use std::numeric_limits::infinity() -- that doesn't have // a device overload (and isn't constexpr before C++11, naturally). - __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); - __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); + __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d); + __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c); } } return z; } -extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b, - float __c, float __d) { +__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) { float __ac = __a * __c; float __bd = __b * __d; float __ad = __a * __d; @@ -75,36 +125,36 @@ float _Complex z; __real__(z) = __ac - __bd; __imag__(z) = __ad + __bc; - if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { + if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { int __recalc = 0; - if (std::isinf(__a) || std::isinf(__b)) { - __a = std::copysign(std::isinf(__a) ? 1 : 0, __a); - __b = std::copysign(std::isinf(__b) ? 1 : 0, __b); - if (std::isnan(__c)) - __c = std::copysign(0, __c); - if (std::isnan(__d)) - __d = std::copysign(0, __d); + if (_ISINFf(__a) || _ISINFf(__b)) { + __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); + __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); + if (_ISNANf(__c)) + __c = _COPYSIGNf(0, __c); + if (_ISNANf(__d)) + __d = _COPYSIGNf(0, __d); __recalc = 1; } - if (std::isinf(__c) || std::isinf(__d)) { - __c = std::copysign(std::isinf(__c) ? 1 : 0, __c); - __d = std::copysign(std::isinf(__d) ? 1 : 0, __d); - if (std::isnan(__a)) - __a = std::copysign(0, __a); - if (std::isnan(__b)) - __b = std::copysign(0, __b); + if (_ISINFf(__c) || _ISINFf(__d)) { + __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); + __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); + if (_ISNANf(__a)) + __a = _COPYSIGNf(0, __a); + if (_ISNANf(__b)) + __b = _COPYSIGNf(0, __b); __recalc = 1; } - if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) || - std::isinf(__ad) || std::isinf(__bc))) { - if (std::isnan(__a)) - __a = std::copysign(0, __a); - if (std::isnan(__b)) - __b = std::copysign(0, __b); - if (std::isnan(__c)) - __c = std::copysign(0, __c); - if (std::isnan(__d)) - __d = std::copysign(0, __d); + if (!__recalc && + (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) { + if (_ISNANf(__a)) + __a = _COPYSIGNf(0, __a); + if (_ISNANf(__b)) + __b = _COPYSIGNf(0, __b); + if (_ISNANf(__c)) + __c = _COPYSIGNf(0, __c); + if (_ISNANf(__d)) + __d = _COPYSIGNf(0, __d); __recalc = 1; } if (__recalc) { @@ -115,36 +165,36 @@ return z; } -extern "C" inline __device__ double _Complex __divdc3(double __a, double __b, - double __c, double __d) { +__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, + double __d) { int __ilogbw = 0; // Can't use std::max, because that's defined in , and we don't // want to pull that in for every compile. The CUDA headers define // ::max(float, float) and ::max(double, double), which is sufficient for us. - double __logbw = std::logb(max(std::abs(__c), std::abs(__d))); - if (std::isfinite(__logbw)) { + double __logbw = _LOGBd(max(_ABSd(__c), _ABSd(__d))); + if (_ISFINITEd(__logbw)) { __ilogbw = (int)__logbw; - __c = std::scalbn(__c, -__ilogbw); - __d = std::scalbn(__d, -__ilogbw); + __c = _SCALBNd(__c, -__ilogbw); + __d = _SCALBNd(__d, -__ilogbw); } double __denom = __c * __c + __d * __d; double _Complex z; - __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); - __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); - if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { - if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) { - __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a; - __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b; - } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) && - std::isfinite(__d)) { - __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a); - __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b); - __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); - __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); - } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) && - std::isfinite(__b)) { - __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c); - __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d); + __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw); + __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw); + if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { + if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) { + __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a; + __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b; + } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) && + _ISFINITEd(__d)) { + __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a); + __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b); + __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d); + __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d); + } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) && + _ISFINITEd(__b)) { + __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c); + __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d); __real__(z) = 0.0 * (__a * __c + __b * __d); __imag__(z) = 0.0 * (__b * __c - __a * __d); } @@ -152,33 +202,32 @@ return z; } -extern "C" inline __device__ float _Complex __divsc3(float __a, float __b, - float __c, float __d) { +__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { int __ilogbw = 0; - float __logbw = std::logb(max(std::abs(__c), std::abs(__d))); - if (std::isfinite(__logbw)) { + float __logbw = _LOGBf(max(_ABSf(__c), _ABSf(__d))); + if (_ISFINITEf(__logbw)) { __ilogbw = (int)__logbw; - __c = std::scalbn(__c, -__ilogbw); - __d = std::scalbn(__d, -__ilogbw); + __c = _SCALBNf(__c, -__ilogbw); + __d = _SCALBNf(__d, -__ilogbw); } float __denom = __c * __c + __d * __d; float _Complex z; - __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); - __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); - if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { - if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) { - __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a; - __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b; - } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) && - std::isfinite(__d)) { - __a = std::copysign(std::isinf(__a) ? 1 : 0, __a); - __b = std::copysign(std::isinf(__b) ? 1 : 0, __b); + __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw); + __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw); + if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { + if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) { + __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a; + __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b; + } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) && + _ISFINITEf(__d)) { + __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); + __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); - } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) && - std::isfinite(__b)) { - __c = std::copysign(std::isinf(__c) ? 1 : 0, __c); - __d = std::copysign(std::isinf(__d) ? 1 : 0, __d); + } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) && + _ISFINITEf(__b)) { + __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); + __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); __real__(z) = 0 * (__a * __c + __b * __d); __imag__(z) = 0 * (__b * __c - __a * __d); } @@ -186,4 +235,29 @@ return z; } +#if defined(__cplusplus) +} // extern "C" +#endif + +#undef _ISNANd +#undef _ISNANf +#undef _ISINFd +#undef _ISINFf +#undef _COPYSIGNd +#undef _COPYSIGNf +#undef _ISFINITEd +#undef _ISFINITEf +#undef _SCALBNd +#undef _SCALBNf +#undef _ABSd +#undef _ABSf +#undef _LOGBd +#undef _LOGBf + +#ifdef _OPENMP +#pragma omp end declare target +#endif + +#pragma pop_macro("__DEVICE__") + #endif // __CLANG_CUDA_COMPLEX_BUILTINS diff --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h --- a/clang/lib/Headers/__clang_cuda_math.h +++ b/clang/lib/Headers/__clang_cuda_math.h @@ -340,6 +340,16 @@ __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); } __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); } +// In C++ mode OpenMP takes the system versions of these because some math +// headers provide the wrong return type. This cannot happen in C and we can and +// want to use the specialized versions right away. +#if defined(_OPENMP) && !defined(__cplusplus) +__DEVICE__ int isinff(float __x) { return __nv_isinff(__x); } +__DEVICE__ int isinf(double __x) { return __nv_isinfd(__x); } +__DEVICE__ int isnanf(float __x) { return __nv_isnanf(__x); } +__DEVICE__ int isnan(double __x) { return __nv_isnand(__x); } +#endif + #pragma pop_macro("__DEVICE__") #pragma pop_macro("__DEVICE_VOID__") #pragma pop_macro("__FAST_OR_SLOW") diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/openmp_wrappers/complex @@ -0,0 +1,25 @@ +/*===-- complex --- OpenMP complex wrapper for target regions --------- c++ -=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_OPENMP_COMPLEX__ +#define __CLANG_OPENMP_COMPLEX__ + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif + +// We require std::math functions in the complex builtins below. +#include + +#define __CUDA__ +#include <__clang_cuda_complex_builtins.h> +#endif + +// Grab the host header too. +#include_next diff --git a/clang/lib/Headers/openmp_wrappers/complex.h b/clang/lib/Headers/openmp_wrappers/complex.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/openmp_wrappers/complex.h @@ -0,0 +1,25 @@ +/*===-- complex --- OpenMP complex wrapper for target regions --------- c++ -=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_OPENMP_COMPLEX_H__ +#define __CLANG_OPENMP_COMPLEX_H__ + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif + +// We require math functions in the complex builtins below. +#include + +#define __CUDA__ +#include <__clang_cuda_complex_builtins.h> +#endif + +// Grab the host header too. +#include_next diff --git a/clang/test/Headers/Inputs/include/cmath b/clang/test/Headers/Inputs/include/cmath --- a/clang/test/Headers/Inputs/include/cmath +++ b/clang/test/Headers/Inputs/include/cmath @@ -49,8 +49,12 @@ float fma(float, float, float); double fmax(double, double); float fmax(float, float); +float max(float, float); +double max(double, double); double fmin(double, double); float fmin(float, float); +float min(float, float); +double min(double, double); double fmod(double, double); float fmod(float, float); int fpclassify(double); diff --git a/clang/test/Headers/Inputs/include/complex b/clang/test/Headers/Inputs/include/complex new file mode 100644 --- /dev/null +++ b/clang/test/Headers/Inputs/include/complex @@ -0,0 +1,301 @@ +#pragma once + +#include + +#define INFINITY (__builtin_inff()) + +namespace std { + +// Taken from libc++ +template +class complex { +public: + typedef _Tp value_type; + +private: + value_type __re_; + value_type __im_; + +public: + complex(const value_type &__re = value_type(), const value_type &__im = value_type()) + : __re_(__re), __im_(__im) {} + template + complex(const complex<_Xp> &__c) + : __re_(__c.real()), __im_(__c.imag()) {} + + value_type real() const { return __re_; } + value_type imag() const { return __im_; } + + void real(value_type __re) { __re_ = __re; } + void imag(value_type __im) { __im_ = __im; } + + complex &operator=(const value_type &__re) { + __re_ = __re; + __im_ = value_type(); + return *this; + } + complex &operator+=(const value_type &__re) { + __re_ += __re; + return *this; + } + complex &operator-=(const value_type &__re) { + __re_ -= __re; + return *this; + } + complex &operator*=(const value_type &__re) { + __re_ *= __re; + __im_ *= __re; + return *this; + } + complex &operator/=(const value_type &__re) { + __re_ /= __re; + __im_ /= __re; + return *this; + } + + template + complex &operator=(const complex<_Xp> &__c) { + __re_ = __c.real(); + __im_ = __c.imag(); + return *this; + } + template + complex &operator+=(const complex<_Xp> &__c) { + __re_ += __c.real(); + __im_ += __c.imag(); + return *this; + } + template + complex &operator-=(const complex<_Xp> &__c) { + __re_ -= __c.real(); + __im_ -= __c.imag(); + return *this; + } + template + complex &operator*=(const complex<_Xp> &__c) { + *this = *this * complex(__c.real(), __c.imag()); + return *this; + } + template + complex &operator/=(const complex<_Xp> &__c) { + *this = *this / complex(__c.real(), __c.imag()); + return *this; + } +}; + +template +inline complex<_Tp> +operator+(const complex<_Tp> &__x, const complex<_Tp> &__y) { + complex<_Tp> __t(__x); + __t += __y; + return __t; +} + +template +inline complex<_Tp> +operator+(const complex<_Tp> &__x, const _Tp &__y) { + complex<_Tp> __t(__x); + __t += __y; + return __t; +} + +template +inline complex<_Tp> +operator+(const _Tp &__x, const complex<_Tp> &__y) { + complex<_Tp> __t(__y); + __t += __x; + return __t; +} + +template +inline complex<_Tp> +operator-(const complex<_Tp> &__x, const complex<_Tp> &__y) { + complex<_Tp> __t(__x); + __t -= __y; + return __t; +} + +template +inline complex<_Tp> +operator-(const complex<_Tp> &__x, const _Tp &__y) { + complex<_Tp> __t(__x); + __t -= __y; + return __t; +} + +template +inline complex<_Tp> +operator-(const _Tp &__x, const complex<_Tp> &__y) { + complex<_Tp> __t(-__y); + __t += __x; + return __t; +} + +template +complex<_Tp> +operator*(const complex<_Tp> &__z, const complex<_Tp> &__w) { + _Tp __a = __z.real(); + _Tp __b = __z.imag(); + _Tp __c = __w.real(); + _Tp __d = __w.imag(); + _Tp __ac = __a * __c; + _Tp __bd = __b * __d; + _Tp __ad = __a * __d; + _Tp __bc = __b * __c; + _Tp __x = __ac - __bd; + _Tp __y = __ad + __bc; + if (std::isnan(__x) && std::isnan(__y)) { + bool __recalc = false; + if (std::isinf(__a) || std::isinf(__b)) { + __a = copysign(std::isinf(__a) ? _Tp(1) : _Tp(0), __a); + __b = copysign(std::isinf(__b) ? _Tp(1) : _Tp(0), __b); + if (std::isnan(__c)) + __c = copysign(_Tp(0), __c); + if (std::isnan(__d)) + __d = copysign(_Tp(0), __d); + __recalc = true; + } + if (std::isinf(__c) || std::isinf(__d)) { + __c = copysign(std::isinf(__c) ? _Tp(1) : _Tp(0), __c); + __d = copysign(std::isinf(__d) ? _Tp(1) : _Tp(0), __d); + if (std::isnan(__a)) + __a = copysign(_Tp(0), __a); + if (std::isnan(__b)) + __b = copysign(_Tp(0), __b); + __recalc = true; + } + if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) || + std::isinf(__ad) || std::isinf(__bc))) { + if (std::isnan(__a)) + __a = copysign(_Tp(0), __a); + if (std::isnan(__b)) + __b = copysign(_Tp(0), __b); + if (std::isnan(__c)) + __c = copysign(_Tp(0), __c); + if (std::isnan(__d)) + __d = copysign(_Tp(0), __d); + __recalc = true; + } + if (__recalc) { + __x = _Tp(INFINITY) * (__a * __c - __b * __d); + __y = _Tp(INFINITY) * (__a * __d + __b * __c); + } + } + return complex<_Tp>(__x, __y); +} + +template +inline complex<_Tp> +operator*(const complex<_Tp> &__x, const _Tp &__y) { + complex<_Tp> __t(__x); + __t *= __y; + return __t; +} + +template +inline complex<_Tp> +operator*(const _Tp &__x, const complex<_Tp> &__y) { + complex<_Tp> __t(__y); + __t *= __x; + return __t; +} + +template +complex<_Tp> +operator/(const complex<_Tp> &__z, const complex<_Tp> &__w) { + int __ilogbw = 0; + _Tp __a = __z.real(); + _Tp __b = __z.imag(); + _Tp __c = __w.real(); + _Tp __d = __w.imag(); + _Tp __logbw = logb(fmax(fabs(__c), fabs(__d))); + if (std::isfinite(__logbw)) { + __ilogbw = static_cast(__logbw); + __c = scalbn(__c, -__ilogbw); + __d = scalbn(__d, -__ilogbw); + } + _Tp __denom = __c * __c + __d * __d; + _Tp __x = scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); + _Tp __y = scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); + if (std::isnan(__x) && std::isnan(__y)) { + if ((__denom == _Tp(0)) && (!std::isnan(__a) || !std::isnan(__b))) { + __x = copysign(_Tp(INFINITY), __c) * __a; + __y = copysign(_Tp(INFINITY), __c) * __b; + } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) && std::isfinite(__d)) { + __a = copysign(std::isinf(__a) ? _Tp(1) : _Tp(0), __a); + __b = copysign(std::isinf(__b) ? _Tp(1) : _Tp(0), __b); + __x = _Tp(INFINITY) * (__a * __c + __b * __d); + __y = _Tp(INFINITY) * (__b * __c - __a * __d); + } else if (std::isinf(__logbw) && __logbw > _Tp(0) && std::isfinite(__a) && std::isfinite(__b)) { + __c = copysign(std::isinf(__c) ? _Tp(1) : _Tp(0), __c); + __d = copysign(std::isinf(__d) ? _Tp(1) : _Tp(0), __d); + __x = _Tp(0) * (__a * __c + __b * __d); + __y = _Tp(0) * (__b * __c - __a * __d); + } + } + return complex<_Tp>(__x, __y); +} + +template +inline complex<_Tp> +operator/(const complex<_Tp> &__x, const _Tp &__y) { + return complex<_Tp>(__x.real() / __y, __x.imag() / __y); +} + +template +inline complex<_Tp> +operator/(const _Tp &__x, const complex<_Tp> &__y) { + complex<_Tp> __t(__x); + __t /= __y; + return __t; +} + +template +inline complex<_Tp> +operator+(const complex<_Tp> &__x) { + return __x; +} + +template +inline complex<_Tp> +operator-(const complex<_Tp> &__x) { + return complex<_Tp>(-__x.real(), -__x.imag()); +} + +template +inline bool +operator==(const complex<_Tp> &__x, const complex<_Tp> &__y) { + return __x.real() == __y.real() && __x.imag() == __y.imag(); +} + +template +inline bool +operator==(const complex<_Tp> &__x, const _Tp &__y) { + return __x.real() == __y && __x.imag() == 0; +} + +template +inline bool +operator==(const _Tp &__x, const complex<_Tp> &__y) { + return __x == __y.real() && 0 == __y.imag(); +} + +template +inline bool +operator!=(const complex<_Tp> &__x, const complex<_Tp> &__y) { + return !(__x == __y); +} + +template +inline bool +operator!=(const complex<_Tp> &__x, const _Tp &__y) { + return !(__x == __y); +} + +template +inline bool +operator!=(const _Tp &__x, const complex<_Tp> &__y) { + return !(__x == __y); +} + +} // namespace std diff --git a/clang/test/Headers/Inputs/include/cstdlib b/clang/test/Headers/Inputs/include/cstdlib --- a/clang/test/Headers/Inputs/include/cstdlib +++ b/clang/test/Headers/Inputs/include/cstdlib @@ -24,4 +24,8 @@ abs(long long __x) { return __builtin_llabs (__x); } float fabs(float __x) { return __builtin_fabs(__x); } + +float abs(float __x) { return fabs(__x); } +double abs(double __x) { return fabs(__x); } + } diff --git a/clang/test/Headers/nvptx_device_math_complex.c b/clang/test/Headers/nvptx_device_math_complex.c --- a/clang/test/Headers/nvptx_device_math_complex.c +++ b/clang/test/Headers/nvptx_device_math_complex.c @@ -1,10 +1,22 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s +// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s // expected-no-diagnostics -// CHECK-DAG: call { float, float } @__divsc3( -// CHECK-DAG: call { float, float } @__mulsc3( +#ifdef __cplusplus +#include +#else +#include +#endif + +// CHECK-DAG: define {{.*}} @__mulsc3 +// CHECK-DAG: define {{.*}} @__muldc3 +// CHECK-DAG: define {{.*}} @__divsc3 +// CHECK-DAG: define {{.*}} @__divdc3 + +// CHECK-DAG: call float @__nv_scalbnf( void test_scmplx(float _Complex a) { #pragma omp target { @@ -12,9 +24,7 @@ } } - -// CHECK-DAG: call { double, double } @__divdc3( -// CHECK-DAG: call { double, double } @__muldc3( +// CHECK-DAG: call double @__nv_scalbn( void test_dcmplx(double _Complex a) { #pragma omp target { diff --git a/clang/test/Headers/nvptx_device_math_complex.cpp b/clang/test/Headers/nvptx_device_math_complex.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_complex.cpp @@ -0,0 +1,27 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s +// expected-no-diagnostics + +#include + +// CHECK-DAG: define {{.*}} @__mulsc3 +// CHECK-DAG: define {{.*}} @__muldc3 +// CHECK-DAG: define {{.*}} @__divsc3 +// CHECK-DAG: define {{.*}} @__divdc3 + +// CHECK-DAG: call float @__nv_scalbnf( +void test_scmplx(std::complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +} + +// CHECK-DAG: call double @__nv_scalbn( +void test_dcmplx(std::complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +}