diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -147,6 +147,7 @@ set(openmp_wrapper_files openmp_wrappers/math.h openmp_wrappers/cmath + openmp_wrappers/complex openmp_wrappers/__clang_openmp_device_functions.h openmp_wrappers/new ) diff --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h --- a/clang/lib/Headers/__clang_cuda_complex_builtins.h +++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h @@ -15,8 +15,20 @@ // operations. (These implementations come from libc++, and have been modified // to work with CUDA.) -extern "C" inline __device__ double _Complex __muldc3(double __a, double __b, - double __c, double __d) { +#pragma push_macro("__DEVICE__") +#ifdef _OPENMP +#pragma omp declare target +#define __DEVICE__ __attribute__((nothrow)) +#else +#define __DEVICE__ __device__ inline +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, + double __d) { double __ac = __a * __c; double __bd = __b * __d; double __ad = __a * __d; @@ -59,15 +71,14 @@ if (__recalc) { // Can't use std::numeric_limits::infinity() -- that doesn't have // a device overload (and isn't constexpr before C++11, naturally). - __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); - __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); + __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d); + __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c); } } return z; } -extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b, - float __c, float __d) { +__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) { float __ac = __a * __c; float __bd = __b * __d; float __ad = __a * __d; @@ -115,8 +126,8 @@ return z; } -extern "C" inline __device__ double _Complex __divdc3(double __a, double __b, - double __c, double __d) { +__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, + double __d) { int __ilogbw = 0; // Can't use std::max, because that's defined in , and we don't // want to pull that in for every compile. The CUDA headers define @@ -133,14 +144,14 @@ __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) { - __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a; - __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b; + __real__(z) = std::copysign(__builtin_huge_val(), __c) * __a; + __imag__(z) = std::copysign(__builtin_huge_val(), __c) * __b; } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) && std::isfinite(__d)) { __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a); __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b); - __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); - __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); + __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d); + __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d); } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) && std::isfinite(__b)) { __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c); @@ -152,8 +163,7 @@ return z; } -extern "C" inline __device__ float _Complex __divsc3(float __a, float __b, - float __c, float __d) { +__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { int __ilogbw = 0; float __logbw = std::logb(max(std::abs(__c), std::abs(__d))); if (std::isfinite(__logbw)) { @@ -186,4 +196,14 @@ return z; } +#if defined(__cplusplus) +} // extern "C" +#endif + +#ifdef _OPENMP +#pragma omp end declare target +#endif + +#pragma pop_macro("__DEVICE__") + #endif // __CLANG_CUDA_COMPLEX_BUILTINS diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/openmp_wrappers/complex @@ -0,0 +1,30 @@ +/*===-- complex --- OpenMP complex wrapper for target regions --------- c++ -=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_OPENMP_COMPLEX_H__ +#define __CLANG_OPENMP_COMPLEX_H__ + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif + +// We require std::math functions in the complex builtins below. +#include + +#pragma omp begin declare variant match( \ + device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) + +#define __CUDA__ +#include <__clang_cuda_complex_builtins.h> +#endif + +#pragma omp end declare variant + +// Grab the host header too. +#include_next diff --git a/clang/test/Headers/Inputs/include/complex b/clang/test/Headers/Inputs/include/complex new file mode 100644 --- /dev/null +++ b/clang/test/Headers/Inputs/include/complex @@ -0,0 +1,301 @@ +#pragma once + +#include + +#define INFINITY (__builtin_inff()) + +namespace std { + +// Taken from libc++ +template +class complex { +public: + typedef _Tp value_type; + +private: + value_type __re_; + value_type __im_; + +public: + complex(const value_type &__re = value_type(), const value_type &__im = value_type()) + : __re_(__re), __im_(__im) {} + template + complex(const complex<_Xp> &__c) + : __re_(__c.real()), __im_(__c.imag()) {} + + value_type real() const { return __re_; } + value_type imag() const { return __im_; } + + void real(value_type __re) { __re_ = __re; } + void imag(value_type __im) { __im_ = __im; } + + complex &operator=(const value_type &__re) { + __re_ = __re; + __im_ = value_type(); + return *this; + } + complex &operator+=(const value_type &__re) { + __re_ += __re; + return *this; + } + complex &operator-=(const value_type &__re) { + __re_ -= __re; + return *this; + } + complex &operator*=(const value_type &__re) { + __re_ *= __re; + __im_ *= __re; + return *this; + } + complex &operator/=(const value_type &__re) { + __re_ /= __re; + __im_ /= __re; + return *this; + } + + template + complex &operator=(const complex<_Xp> &__c) { + __re_ = __c.real(); + __im_ = __c.imag(); + return *this; + } + template + complex &operator+=(const complex<_Xp> &__c) { + __re_ += __c.real(); + __im_ += __c.imag(); + return *this; + } + template + complex &operator-=(const complex<_Xp> &__c) { + __re_ -= __c.real(); + __im_ -= __c.imag(); + return *this; + } + template + complex &operator*=(const complex<_Xp> &__c) { + *this = *this * complex(__c.real(), __c.imag()); + return *this; + } + template + complex &operator/=(const complex<_Xp> &__c) { + *this = *this / complex(__c.real(), __c.imag()); + return *this; + } +}; + +template +inline complex<_Tp> +operator+(const complex<_Tp> &__x, const complex<_Tp> &__y) { + complex<_Tp> __t(__x); + __t += __y; + return __t; +} + +template +inline complex<_Tp> +operator+(const complex<_Tp> &__x, const _Tp &__y) { + complex<_Tp> __t(__x); + __t += __y; + return __t; +} + +template +inline complex<_Tp> +operator+(const _Tp &__x, const complex<_Tp> &__y) { + complex<_Tp> __t(__y); + __t += __x; + return __t; +} + +template +inline complex<_Tp> +operator-(const complex<_Tp> &__x, const complex<_Tp> &__y) { + complex<_Tp> __t(__x); + __t -= __y; + return __t; +} + +template +inline complex<_Tp> +operator-(const complex<_Tp> &__x, const _Tp &__y) { + complex<_Tp> __t(__x); + __t -= __y; + return __t; +} + +template +inline complex<_Tp> +operator-(const _Tp &__x, const complex<_Tp> &__y) { + complex<_Tp> __t(-__y); + __t += __x; + return __t; +} + +template +complex<_Tp> +operator*(const complex<_Tp> &__z, const complex<_Tp> &__w) { + _Tp __a = __z.real(); + _Tp __b = __z.imag(); + _Tp __c = __w.real(); + _Tp __d = __w.imag(); + _Tp __ac = __a * __c; + _Tp __bd = __b * __d; + _Tp __ad = __a * __d; + _Tp __bc = __b * __c; + _Tp __x = __ac - __bd; + _Tp __y = __ad + __bc; + if (std::isnan(__x) && std::isnan(__y)) { + bool __recalc = false; + if (std::isinf(__a) || std::isinf(__b)) { + __a = copysign(std::isinf(__a) ? _Tp(1) : _Tp(0), __a); + __b = copysign(std::isinf(__b) ? _Tp(1) : _Tp(0), __b); + if (std::isnan(__c)) + __c = copysign(_Tp(0), __c); + if (std::isnan(__d)) + __d = copysign(_Tp(0), __d); + __recalc = true; + } + if (std::isinf(__c) || std::isinf(__d)) { + __c = copysign(std::isinf(__c) ? _Tp(1) : _Tp(0), __c); + __d = copysign(std::isinf(__d) ? _Tp(1) : _Tp(0), __d); + if (std::isnan(__a)) + __a = copysign(_Tp(0), __a); + if (std::isnan(__b)) + __b = copysign(_Tp(0), __b); + __recalc = true; + } + if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) || + std::isinf(__ad) || std::isinf(__bc))) { + if (std::isnan(__a)) + __a = copysign(_Tp(0), __a); + if (std::isnan(__b)) + __b = copysign(_Tp(0), __b); + if (std::isnan(__c)) + __c = copysign(_Tp(0), __c); + if (std::isnan(__d)) + __d = copysign(_Tp(0), __d); + __recalc = true; + } + if (__recalc) { + __x = _Tp(INFINITY) * (__a * __c - __b * __d); + __y = _Tp(INFINITY) * (__a * __d + __b * __c); + } + } + return complex<_Tp>(__x, __y); +} + +template +inline complex<_Tp> +operator*(const complex<_Tp> &__x, const _Tp &__y) { + complex<_Tp> __t(__x); + __t *= __y; + return __t; +} + +template +inline complex<_Tp> +operator*(const _Tp &__x, const complex<_Tp> &__y) { + complex<_Tp> __t(__y); + __t *= __x; + return __t; +} + +template +complex<_Tp> +operator/(const complex<_Tp> &__z, const complex<_Tp> &__w) { + int __ilogbw = 0; + _Tp __a = __z.real(); + _Tp __b = __z.imag(); + _Tp __c = __w.real(); + _Tp __d = __w.imag(); + _Tp __logbw = logb(fmax(fabs(__c), fabs(__d))); + if (std::isfinite(__logbw)) { + __ilogbw = static_cast(__logbw); + __c = scalbn(__c, -__ilogbw); + __d = scalbn(__d, -__ilogbw); + } + _Tp __denom = __c * __c + __d * __d; + _Tp __x = scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); + _Tp __y = scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); + if (std::isnan(__x) && std::isnan(__y)) { + if ((__denom == _Tp(0)) && (!std::isnan(__a) || !std::isnan(__b))) { + __x = copysign(_Tp(INFINITY), __c) * __a; + __y = copysign(_Tp(INFINITY), __c) * __b; + } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) && std::isfinite(__d)) { + __a = copysign(std::isinf(__a) ? _Tp(1) : _Tp(0), __a); + __b = copysign(std::isinf(__b) ? _Tp(1) : _Tp(0), __b); + __x = _Tp(INFINITY) * (__a * __c + __b * __d); + __y = _Tp(INFINITY) * (__b * __c - __a * __d); + } else if (std::isinf(__logbw) && __logbw > _Tp(0) && std::isfinite(__a) && std::isfinite(__b)) { + __c = copysign(std::isinf(__c) ? _Tp(1) : _Tp(0), __c); + __d = copysign(std::isinf(__d) ? _Tp(1) : _Tp(0), __d); + __x = _Tp(0) * (__a * __c + __b * __d); + __y = _Tp(0) * (__b * __c - __a * __d); + } + } + return complex<_Tp>(__x, __y); +} + +template +inline complex<_Tp> +operator/(const complex<_Tp> &__x, const _Tp &__y) { + return complex<_Tp>(__x.real() / __y, __x.imag() / __y); +} + +template +inline complex<_Tp> +operator/(const _Tp &__x, const complex<_Tp> &__y) { + complex<_Tp> __t(__x); + __t /= __y; + return __t; +} + +template +inline complex<_Tp> +operator+(const complex<_Tp> &__x) { + return __x; +} + +template +inline complex<_Tp> +operator-(const complex<_Tp> &__x) { + return complex<_Tp>(-__x.real(), -__x.imag()); +} + +template +inline bool +operator==(const complex<_Tp> &__x, const complex<_Tp> &__y) { + return __x.real() == __y.real() && __x.imag() == __y.imag(); +} + +template +inline bool +operator==(const complex<_Tp> &__x, const _Tp &__y) { + return __x.real() == __y && __x.imag() == 0; +} + +template +inline bool +operator==(const _Tp &__x, const complex<_Tp> &__y) { + return __x == __y.real() && 0 == __y.imag(); +} + +template +inline bool +operator!=(const complex<_Tp> &__x, const complex<_Tp> &__y) { + return !(__x == __y); +} + +template +inline bool +operator!=(const complex<_Tp> &__x, const _Tp &__y) { + return !(__x == __y); +} + +template +inline bool +operator!=(const _Tp &__x, const complex<_Tp> &__y) { + return !(__x == __y); +} + +} // namespace std diff --git a/clang/test/Headers/Inputs/include/cstdlib b/clang/test/Headers/Inputs/include/cstdlib --- a/clang/test/Headers/Inputs/include/cstdlib +++ b/clang/test/Headers/Inputs/include/cstdlib @@ -24,4 +24,8 @@ abs(long long __x) { return __builtin_llabs (__x); } float fabs(float __x) { return __builtin_fabs(__x); } + +float abs(float __x) { return fabs(__x); } +double abs(double __x) { return fabs(__x); } + } diff --git a/clang/test/Headers/Inputs/include/math.h b/clang/test/Headers/Inputs/include/math.h --- a/clang/test/Headers/Inputs/include/math.h +++ b/clang/test/Headers/Inputs/include/math.h @@ -107,6 +107,10 @@ long lroundf(float __a); int max(int __a, int __b); int min(int __a, int __b); +float max(float __a, float __b); +float min(float __a, float __b); +double max(double __a, double __b); +double min(double __a, double __b); double modf(double __a, double *__b); float modff(float __a, float *__b); double nearbyint(double __a); diff --git a/clang/test/Headers/nvptx_device_math_complex.cpp b/clang/test/Headers/nvptx_device_math_complex.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_complex.cpp @@ -0,0 +1,25 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s +// expected-no-diagnostics + +#include + +// CHECK-DAG: define {{.*}} @__mulsc3 +// CHECK-DAG: define {{.*}} @__muldc3 +// CHECK-DAG: define {{.*}} @__divsc3 +// CHECK-DAG: define {{.*}} @__divdc3 + +void test_scmplx(std::complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +} + +void test_dcmplx(std::complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +}