diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -140,10 +140,8 @@ ) set(openmp_wrapper_files - openmp_wrappers/math.h - openmp_wrappers/cmath - openmp_wrappers/__clang_openmp_math.h openmp_wrappers/__clang_openmp_math_declares.h + openmp_wrappers/cmath openmp_wrappers/new ) diff --git a/clang/lib/Headers/__clang_cuda_cmath.h b/clang/lib/Headers/__clang_cuda_cmath.h --- a/clang/lib/Headers/__clang_cuda_cmath.h +++ b/clang/lib/Headers/__clang_cuda_cmath.h @@ -45,17 +45,10 @@ #define __NOEXCEPT #endif -#if !(defined(_OPENMP) && defined(__cplusplus)) __DEVICE__ long long abs(long long __n) { return ::llabs(__n); } __DEVICE__ long abs(long __n) { return ::labs(__n); } __DEVICE__ float abs(float __x) { return ::fabsf(__x); } __DEVICE__ double abs(double __x) { return ::fabs(__x); } -#endif -// TODO: remove once variat is supported. -#if defined(_OPENMP) && defined(__cplusplus) -__DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); } -__DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); } -#endif __DEVICE__ float acos(float __x) { return ::acosf(__x); } __DEVICE__ float asin(float __x) { return ::asinf(__x); } __DEVICE__ float atan(float __x) { return ::atanf(__x); } @@ -67,8 +60,6 @@ __DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); } __DEVICE__ float floor(float __x) { return ::floorf(__x); } __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } -// TODO: remove when variant is supported -#ifndef _OPENMP __DEVICE__ int fpclassify(float __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); @@ -77,7 +68,6 @@ return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } -#endif __DEVICE__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } @@ -322,6 +312,7 @@ return std::scalbn((double)__x, __exp); } +#ifndef _OPENMP // We need to define these overloads in exactly the namespace our standard // library uses (including the right inline namespace), otherwise they won't be // picked up by other functions in the standard library (e.g. functions in @@ -457,10 +448,7 @@ using ::remquof; using ::rintf; using ::roundf; -// TODO: remove once variant is supported -#ifndef _OPENMP using ::scalblnf; -#endif using ::scalbnf; using ::sinf; using ::sinhf; @@ -478,6 +466,7 @@ #endif } // namespace std #endif +#endif #undef __NOEXCEPT #undef __DEVICE__ diff --git a/clang/lib/Headers/__clang_cuda_device_functions.h b/clang/lib/Headers/__clang_cuda_device_functions.h --- a/clang/lib/Headers/__clang_cuda_device_functions.h +++ b/clang/lib/Headers/__clang_cuda_device_functions.h @@ -1503,10 +1503,8 @@ __DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); } __DEVICE__ double ceil(double __a) { return __nv_ceil(__a); } __DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); } -#ifndef _OPENMP __DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); } __DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); } -#endif __DEVICE__ double copysign(double __a, double __b) { return __nv_copysign(__a, __b); } @@ -1719,8 +1717,6 @@ __DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); } __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); } __DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); } -// TODO: remove once variant is supported -#ifndef _OPENMP __DEVICE__ double scalbln(double __a, long __b) { if (__b > INT_MAX) return __a > 0 ? HUGE_VAL : -HUGE_VAL; @@ -1735,7 +1731,6 @@ return __a > 0 ? 0.f : -0.f; return scalbnf(__a, (int)__b); } -#endif __DEVICE__ double sin(double __a) { return __nv_sin(__a); } __DEVICE__ void sincos(double __a, double *__s, double *__c) { return __nv_sincos(__a, __s, __c); diff --git a/clang/lib/Headers/__clang_cuda_math_forward_declares.h b/clang/lib/Headers/__clang_cuda_math_forward_declares.h --- a/clang/lib/Headers/__clang_cuda_math_forward_declares.h +++ b/clang/lib/Headers/__clang_cuda_math_forward_declares.h @@ -20,37 +20,14 @@ // would preclude the use of our own __device__ overloads for these functions. #pragma push_macro("__DEVICE__") -#ifdef _OPENMP -#define __DEVICE__ static __inline__ __attribute__((always_inline)) -#else #define __DEVICE__ \ static __inline__ __attribute__((always_inline)) __attribute__((device)) -#endif - -// For C++ 17 we need to include noexcept attribute to be compatible -// with the header-defined version. This may be removed once -// variant is supported. -#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L -#define __NOEXCEPT noexcept -#else -#define __NOEXCEPT -#endif -#if !(defined(_OPENMP) && defined(__cplusplus)) __DEVICE__ long abs(long); __DEVICE__ long long abs(long long); __DEVICE__ double abs(double); __DEVICE__ float abs(float); -#endif -// While providing the CUDA declarations and definitions for math functions, -// we may manually define additional functions. -// TODO: Once variant is supported the additional functions will have -// to be removed. -#if defined(_OPENMP) && defined(__cplusplus) -__DEVICE__ const double abs(const double); -__DEVICE__ const float abs(const float); -#endif -__DEVICE__ int abs(int) __NOEXCEPT; +__DEVICE__ int abs(int); __DEVICE__ double acos(double); __DEVICE__ float acos(float); __DEVICE__ double acosh(double); @@ -85,8 +62,8 @@ __DEVICE__ float exp(float); __DEVICE__ double expm1(double); __DEVICE__ float expm1(float); -__DEVICE__ double fabs(double) __NOEXCEPT; -__DEVICE__ float fabs(float) __NOEXCEPT; +__DEVICE__ double fabs(double); +__DEVICE__ float fabs(float); __DEVICE__ double fdim(double, double); __DEVICE__ float fdim(float, float); __DEVICE__ double floor(double); @@ -136,12 +113,12 @@ __DEVICE__ bool isnormal(float); __DEVICE__ bool isunordered(double, double); __DEVICE__ bool isunordered(float, float); -__DEVICE__ long labs(long) __NOEXCEPT; +__DEVICE__ long labs(long); __DEVICE__ double ldexp(double, int); __DEVICE__ float ldexp(float, int); __DEVICE__ double lgamma(double); __DEVICE__ float lgamma(float); -__DEVICE__ long long llabs(long long) __NOEXCEPT; +__DEVICE__ long long llabs(long long); __DEVICE__ long long llrint(double); __DEVICE__ long long llrint(float); __DEVICE__ double log10(double); @@ -152,9 +129,6 @@ __DEVICE__ float log2(float); __DEVICE__ double logb(double); __DEVICE__ float logb(float); -#if defined(_OPENMP) && defined(__cplusplus) -__DEVICE__ long double log(long double); -#endif __DEVICE__ double log(double); __DEVICE__ float log(float); __DEVICE__ long lrint(double); @@ -302,7 +276,6 @@ } // namespace std #endif -#undef __NOEXCEPT #pragma pop_macro("__DEVICE__") #endif diff --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h deleted file mode 100644 --- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h +++ /dev/null @@ -1,35 +0,0 @@ -/*===---- __clang_openmp_math.h - OpenMP target math support ---------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===-----------------------------------------------------------------------=== - */ - -#if defined(__NVPTX__) && defined(_OPENMP) -/// TODO: -/// We are currently reusing the functionality of the Clang-CUDA code path -/// as an alternative to the host declarations provided by math.h and cmath. -/// This is suboptimal. -/// -/// We should instead declare the device functions in a similar way, e.g., -/// through OpenMP 5.0 variants, and afterwards populate the module with the -/// host declarations by unconditionally including the host math.h or cmath, -/// respectively. This is actually what the Clang-CUDA code path does, using -/// __device__ instead of variants to avoid redeclarations and get the desired -/// overload resolution. - -#define __CUDA__ - -#if defined(__cplusplus) - #include <__clang_cuda_cmath.h> -#endif - -#undef __CUDA__ - -/// Magic macro for stopping the math.h/cmath host header from being included. -#define __CLANG_NO_HOST_MATH__ - -#endif - diff --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h --- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h +++ b/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h @@ -14,20 +14,56 @@ #error "This file is for OpenMP compilation only." #endif -#if defined(__NVPTX__) && defined(_OPENMP) +/** + * A positive float constant expression. HUGE_VALF evaluates + * to +infinity. Used as an error value returned by the built-in + * math functions. + */ +#define HUGE_VALF (__builtin_huge_valf()) -#define __CUDA__ +/** + * A positive double constant expression. HUGE_VAL evaluates + * to +infinity. Used as an error value returned by the built-in + * math functions. + */ +#define HUGE_VAL (__builtin_huge_val()) #if defined(__cplusplus) - #include <__clang_cuda_math_forward_declares.h> + #include + #include +#else + #include #endif +#pragma omp begin declare variant match(device={arch(nvptx64)}) +#define __CUDA__ + /// Include declarations for libdevice functions. #include <__clang_cuda_libdevice_declares.h> + /// Provide definitions for these functions. #include <__clang_cuda_device_functions.h> #undef __CUDA__ +// TODO: Hack until we support an extension to the match clause that allows "or". +#undef __CLANG_CUDA_LIBDEVICE_DECLARES_H__ +#undef __CLANG_CUDA_DEVICE_FUNCTIONS_H__ +#pragma omp end declare variant + +#pragma omp begin declare variant match(device={arch(nvptx)}) +#define __CUDA__ + +/// Include declarations for libdevice functions. +#include <__clang_cuda_libdevice_declares.h> + +/// Provide definitions for these functions. +#include <__clang_cuda_device_functions.h> + +#undef __CUDA__ +#pragma omp end declare variant + + +#undef HUGE_VAL +#undef HUGE_VALF -#endif #endif diff --git a/clang/lib/Headers/openmp_wrappers/cmath b/clang/lib/Headers/openmp_wrappers/cmath --- a/clang/lib/Headers/openmp_wrappers/cmath +++ b/clang/lib/Headers/openmp_wrappers/cmath @@ -1,4 +1,4 @@ -/*===-------------- cmath - Alternative cmath header -----------------------=== +/*===---- __clang_openmp_math_declares.h - OpenMP math declares ------ c++ -=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. @@ -7,10 +7,33 @@ *===-----------------------------------------------------------------------=== */ -#include <__clang_openmp_math.h> +#ifndef __CLANG_OPENMP_CMATH_H__ +#define __CLANG_OPENMP_CMATH_H__ -#ifndef __CLANG_NO_HOST_MATH__ +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif + +#include_next #include_next -#else -#undef __CLANG_NO_HOST_MATH__ + +#pragma omp begin declare variant match(device={arch(nvptx64)}) +#define __CUDA__ + +#include <__clang_cuda_cmath.h> + +// TODO: Hack until we support an extension to the match clause that allows "or". +#undef __CLANG_CUDA_CMATH_H__ + +#undef __CUDA__ +#pragma omp end declare variant + +#pragma omp begin declare variant match(device={arch(nvptx)}) +#define __CUDA__ + +#include <__clang_cuda_cmath.h> + +#undef __CUDA__ +#pragma omp end declare variant + #endif diff --git a/clang/lib/Headers/openmp_wrappers/math.h b/clang/lib/Headers/openmp_wrappers/math.h deleted file mode 100644 --- a/clang/lib/Headers/openmp_wrappers/math.h +++ /dev/null @@ -1,17 +0,0 @@ -/*===------------- math.h - Alternative math.h header ----------------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===-----------------------------------------------------------------------=== - */ - -#include <__clang_openmp_math.h> - -#ifndef __CLANG_NO_HOST_MATH__ -#include_next -#else -#undef __CLANG_NO_HOST_MATH__ -#endif - diff --git a/clang/test/Headers/Inputs/include/climits b/clang/test/Headers/Inputs/include/climits new file mode 100644 --- /dev/null +++ b/clang/test/Headers/Inputs/include/climits @@ -0,0 +1,4 @@ +#pragma once + +#define INT_MIN -2147483648 +#define INT_MAX 2147483647 diff --git a/clang/test/Headers/Inputs/include/cmath b/clang/test/Headers/Inputs/include/cmath --- a/clang/test/Headers/Inputs/include/cmath +++ b/clang/test/Headers/Inputs/include/cmath @@ -1,5 +1,220 @@ #pragma once -double sqrt(double); -double pow(double, double); -double modf(double, double*); +static double acos(double); +static float acos(float); +static double acosh(double); +static float acosh(float); +static double asin(double); +static float asin(float); +static double asinh(double); +static float asinh(float); +static double atan2(double, double); +static float atan2(float, float); +static double atan(double); +static float atan(float); +static double atanh(double); +static float atanh(float); +static double cbrt(double); +static float cbrt(float); +static double ceil(double); +static float ceil(float); +static double copysign(double, double); +static float copysign(float, float); +static double cos(double); +static float cos(float); +static double cosh(double); +static float cosh(float); +static double erfc(double); +static float erfc(float); +static double erf(double); +static float erf(float); +static double exp2(double); +static float exp2(float); +static double exp(double); +static float exp(float); +static double expm1(double); +static float expm1(float); +static double fdim(double, double); +static float fdim(float, float); +static double floor(double); +static float floor(float); +static double fma(double, double, double); +static float fma(float, float, float); +static double fmax(double, double); +static float fmax(float, float); +static double fmin(double, double); +static float fmin(float, float); +static double fmod(double, double); +static float fmod(float, float); +static int fpclassify(double); +static int fpclassify(float); +static double frexp(double, int *); +static float frexp(float, int *); +static double hypot(double, double); +static float hypot(float, float); +static int ilogb(double); +static int ilogb(float); +static bool isfinite(long double); +static bool isfinite(double); +static bool isfinite(float); +static bool isgreater(double, double); +static bool isgreaterequal(double, double); +static bool isgreaterequal(float, float); +static bool isgreater(float, float); +static bool isinf(long double); +static bool isinf(double); +static bool isinf(float); +static bool isless(double, double); +static bool islessequal(double, double); +static bool islessequal(float, float); +static bool isless(float, float); +static bool islessgreater(double, double); +static bool islessgreater(float, float); +static bool isnan(long double); +static bool isnan(double); +static bool isnan(float); +static bool isnormal(double); +static bool isnormal(float); +static bool isunordered(double, double); +static bool isunordered(float, float); +static double ldexp(double, int); +static float ldexp(float, int); +static double lgamma(double); +static float lgamma(float); +static long long llrint(double); +static long long llrint(float); +static double log10(double); +static float log10(float); +static double log1p(double); +static float log1p(float); +static double log2(double); +static float log2(float); +static double logb(double); +static float logb(float); +static double log(double); +static float log(float); +static long lrint(double); +static long lrint(float); +static long lround(double); +static long lround(float); +static long long llround(float); // No llround(double). +static double modf(double, double *); +static float modf(float, float *); +static double nan(const char *); +static float nanf(const char *); +static double nearbyint(double); +static float nearbyint(float); +static double nextafter(double, double); +static float nextafter(float, float); +static double pow(double, double); +static double pow(double, int); +static float pow(float, float); +static float pow(float, int); +static double remainder(double, double); +static float remainder(float, float); +static double remquo(double, double, int *); +static float remquo(float, float, int *); +static double rint(double); +static float rint(float); +static double round(double); +static float round(float); +static double scalbln(double, long); +static float scalbln(float, long); +static double scalbn(double, int); +static float scalbn(float, int); +static bool signbit(double); +static bool signbit(float); +static long double sin(long double); +static double sin(double); +static float sin(float); +static double sinh(double); +static float sinh(float); +static double sqrt(double); +static float sqrt(float); +static double tan(double); +static float tan(float); +static double tanh(double); +static float tanh(float); +static double tgamma(double); +static float tgamma(float); +static double trunc(double); +static float trunc(float); + +namespace std { + +using ::acos; +using ::acosh; +using ::asin; +using ::asinh; +using ::atan; +using ::atan2; +using ::atanh; +using ::cbrt; +using ::ceil; +using ::copysign; +using ::cos; +using ::cosh; +using ::erf; +using ::erfc; +using ::exp; +using ::exp2; +using ::expm1; +using ::fdim; +using ::floor; +using ::fma; +using ::fmax; +using ::fmin; +using ::fmod; +using ::fpclassify; +using ::frexp; +using ::hypot; +using ::ilogb; +using ::isfinite; +using ::isgreater; +using ::isgreaterequal; +using ::isinf; +using ::isless; +using ::islessequal; +using ::islessgreater; +using ::isnan; +using ::isnormal; +using ::isunordered; +using ::ldexp; +using ::lgamma; +using ::llrint; +using ::log; +using ::log10; +using ::log1p; +using ::log2; +using ::logb; +using ::lrint; +using ::lround; +using ::llround; +using ::modf; +using ::nan; +using ::nanf; +using ::nearbyint; +using ::nextafter; +using ::pow; +using ::remainder; +using ::remquo; +using ::rint; +using ::round; +using ::scalbln; +using ::scalbn; +using ::signbit; +using ::sin; +using ::sinh; +using ::sqrt; +using ::tan; +using ::tanh; +using ::tgamma; +using ::trunc; + +} // namespace std + +#define FP_NAN 0 +#define FP_INFINITE 1 +#define FP_ZERO 2 +#define FP_SUBNORMAL 3 +#define FP_NORMAL 4 diff --git a/clang/test/Headers/Inputs/include/cstdlib b/clang/test/Headers/Inputs/include/cstdlib --- a/clang/test/Headers/Inputs/include/cstdlib +++ b/clang/test/Headers/Inputs/include/cstdlib @@ -1,23 +1,17 @@ #pragma once #if __cplusplus >= 201703L -extern int abs (int __x) throw() __attribute__ ((__const__)) ; -extern long int labs (long int __x) throw() __attribute__ ((__const__)) ; -extern float fabs (float __x) throw() __attribute__ ((__const__)) ; +static int abs(int __x) throw() __attribute__((__const__)) { return __builtin_abs(__x); } +static long int labs(long int __x) throw() __attribute__((__const__)) { return __builtin_labs(__x); } +static float fabs(float __x) throw() __attribute__((__const__)) { return __builtin_fabs(__x); } #else -extern int abs (int __x) __attribute__ ((__const__)) ; -extern long int labs (long int __x) __attribute__ ((__const__)) ; -extern float fabs (float __x) __attribute__ ((__const__)) ; +static int abs(int __x) __attribute__((__const__)) { return __builtin_abs(__x); } +static long int labs(long int __x) __attribute__((__const__)) { return __builtin_labs(__x); } +static float fabs(float __x) __attribute__((__const__)) { return __builtin_fabs(__x); } #endif -namespace std -{ +namespace std { using ::abs; -inline long -abs(long __i) { return __builtin_labs(__i); } - -inline long long -abs(long long __x) { return __builtin_llabs (__x); } } diff --git a/clang/test/Headers/Inputs/include/math.h b/clang/test/Headers/Inputs/include/math.h --- a/clang/test/Headers/Inputs/include/math.h +++ b/clang/test/Headers/Inputs/include/math.h @@ -1,5 +1,182 @@ #pragma once -double sqrt(double); -double pow(double, double); -double modf(double, double*); +static double fabs(double __a); +static double acos(double __a); +static float acosf(float __a); +static double acosh(double __a); +static float acoshf(float __a); +static double asin(double __a); +static float asinf(float __a); +static double asinh(double __a); +static float asinhf(float __a); +static double atan(double __a); +static double atan2(double __a, double __b); +static float atan2f(float __a, float __b); +static float atanf(float __a); +static double atanh(double __a); +static float atanhf(float __a); +static double cbrt(double __a); +static float cbrtf(float __a); +static double ceil(double __a); +static float ceilf(float __a); +static int clock(); +static long long clock64(); +static double copysign(double __a, double __b); +static float copysignf(float __a, float __b); +static double cos(double __a); +static float cosf(float __a); +static double cosh(double __a); +static float coshf(float __a); +static double cospi(double __a); +static float cospif(float __a); +static double cyl_bessel_i0(double __a); +static float cyl_bessel_i0f(float __a); +static double cyl_bessel_i1(double __a); +static float cyl_bessel_i1f(float __a); +static double erf(double __a); +static double erfc(double __a); +static float erfcf(float __a); +static double erfcinv(double __a); +static float erfcinvf(float __a); +static double erfcx(double __a); +static float erfcxf(float __a); +static float erff(float __a); +static double erfinv(double __a); +static float erfinvf(float __a); +static double exp(double __a); +static double exp10(double __a); +static float exp10f(float __a); +static double exp2(double __a); +static float exp2f(float __a); +static float expf(float __a); +static double expm1(double __a); +static float expm1f(float __a); +static float fabsf(float __a); +static double fdim(double __a, double __b); +static float fdimf(float __a, float __b); +static double fdivide(double __a, double __b); +static float fdividef(float __a, float __b); +static double floor(double __f); +static float floorf(float __f); +static double fma(double __a, double __b, double __c); +static float fmaf(float __a, float __b, float __c); +static double fmax(double __a, double __b); +static float fmaxf(float __a, float __b); +static double fmin(double __a, double __b); +static float fminf(float __a, float __b); +static double fmod(double __a, double __b); +static float fmodf(float __a, float __b); +static double frexp(double __a, int *__b); +static float frexpf(float __a, int *__b); +static double hypot(double __a, double __b); +static float hypotf(float __a, float __b); +static int ilogb(double __a); +static int ilogbf(float __a); +static double j0(double __a); +static float j0f(float __a); +static double j1(double __a); +static float j1f(float __a); +static double jn(int __n, double __a); +static float jnf(int __n, float __a); +static double ldexp(double __a, int __b); +static float ldexpf(float __a, int __b); +static double lgamma(double __a); +static float lgammaf(float __a); +static long long llmax(long long __a, long long __b); +static long long llmin(long long __a, long long __b); +static long long llrint(double __a); +static long long llrintf(float __a); +static long long llround(double __a); +static long long llroundf(float __a); +static double log(double __a); +static double log10(double __a); +static float log10f(float __a); +static double log1p(double __a); +static float log1pf(float __a); +static double log2(double __a); +static float log2f(float __a); +static double logb(double __a); +static float logbf(float __a); +static float logf(float __a); +static long lrint(double __a); +static long lrintf(float __a); +static long lround(double __a); +static long lroundf(float __a); +static int max(int __a, int __b); +static int min(int __a, int __b); +static double modf(double __a, double *__b); +static float modff(float __a, float *__b); +static double nearbyint(double __a); +static float nearbyintf(float __a); +static double nextafter(double __a, double __b); +static float nextafterf(float __a, float __b); +static double norm(int __dim, const double *__t); +static double norm3d(double __a, double __b, double __c); +static float norm3df(float __a, float __b, float __c); +static double norm4d(double __a, double __b, double __c, double __d); +static float norm4df(float __a, float __b, float __c, float __d); +static double normcdf(double __a); +static float normcdff(float __a); +static double normcdfinv(double __a); +static float normcdfinvf(float __a); +static float normf(int __dim, const float *__t); +static double pow(double __a, double __b); +static float powf(float __a, float __b); +static double powi(double __a, int __b); +static float powif(float __a, int __b); +static double rcbrt(double __a); +static float rcbrtf(float __a); +static double remainder(double __a, double __b); +static float remainderf(float __a, float __b); +static double remquo(double __a, double __b, int *__c); +static float remquof(float __a, float __b, int *__c); +static double rhypot(double __a, double __b); +static float rhypotf(float __a, float __b); +static double rint(double __a); +static float rintf(float __a); +static double rnorm(int __a, const double *__b); +static double rnorm3d(double __a, double __b, double __c); +static float rnorm3df(float __a, float __b, float __c); +static double rnorm4d(double __a, double __b, double __c, double __d); +static float rnorm4df(float __a, float __b, float __c, float __d); +static float rnormf(int __dim, const float *__t); +static double round(double __a); +static float roundf(float __a); +static double rsqrt(double __a); +static float rsqrtf(float __a); +static double scalbn(double __a, int __b); +static float scalbnf(float __a, int __b); +static double scalbln(double __a, long __b); +static float scalblnf(float __a, long __b); +static double sin(double __a); +static void sincos(double __a, double *__s, double *__c); +static void sincosf(float __a, float *__s, float *__c); +static void sincospi(double __a, double *__s, double *__c); +static void sincospif(float __a, float *__s, float *__c); +static float sinf(float __a); +static double sinh(double __a); +static float sinhf(float __a); +static double sinpi(double __a); +static float sinpif(float __a); +static double sqrt(double __a); +static float sqrtf(float __a); +static double tan(double __a); +static float tanf(float __a); +static double tanh(double __a); +static float tanhf(float __a); +static double tgamma(double __a); +static float tgammaf(float __a); +static double trunc(double __a); +static float truncf(float __a); +static unsigned long long ullmax(unsigned long long __a, + unsigned long long __b); +static unsigned long long ullmin(unsigned long long __a, + unsigned long long __b); +static unsigned int umax(unsigned int __a, unsigned int __b); +static unsigned int umin(unsigned int __a, unsigned int __b); +static double y0(double __a); +static float y0f(float __a); +static double y1(double __a); +static float y1f(float __a); +static double yn(int __a, double __b); +static float ynf(int __a, float __b); diff --git a/clang/test/Headers/Inputs/include/stdlib.h b/clang/test/Headers/Inputs/include/stdlib.h --- a/clang/test/Headers/Inputs/include/stdlib.h +++ b/clang/test/Headers/Inputs/include/stdlib.h @@ -1,2 +1,4 @@ #pragma once typedef __SIZE_TYPE__ size_t; + +static int abs(int __a); diff --git a/clang/test/Headers/nvptx_device_cmath_functions.c b/clang/test/Headers/nvptx_device_cmath_functions.c deleted file mode 100644 --- a/clang/test/Headers/nvptx_device_cmath_functions.c +++ /dev/null @@ -1,25 +0,0 @@ -// Test calling of device math functions. -///==========================================================================/// - -// REQUIRES: nvptx-registered-target - -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include cmath -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include cmath -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s - -#include - -void test_sqrt(double a1) { - #pragma omp target - { - // CHECK-YES: call double @__nv_sqrt(double - double l1 = sqrt(a1); - // CHECK-YES: call double @__nv_pow(double - double l2 = pow(a1, a1); - // CHECK-YES: call double @__nv_modf(double - double l3 = modf(a1 + 3.5, &a1); - // CHECK-YES: call double @__nv_fabs(double - double l4 = fabs(a1); - // CHECK-YES: call i32 @__nv_abs(i32 - double l5 = abs((int)a1); - } -} diff --git a/clang/test/Headers/nvptx_device_cmath_functions.cpp b/clang/test/Headers/nvptx_device_cmath_functions.cpp --- a/clang/test/Headers/nvptx_device_cmath_functions.cpp +++ b/clang/test/Headers/nvptx_device_cmath_functions.cpp @@ -3,8 +3,8 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include cmath -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include cmath -internal-isystem %S/Inputs/include -include stdlib.h -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s #include #include diff --git a/clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp b/clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp --- a/clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp +++ b/clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp @@ -3,8 +3,8 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include cmath -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17 -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include cmath -internal-isystem %S/Inputs/include -include stdlib.h -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17 +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s #include #include diff --git a/clang/test/Headers/nvptx_device_math_complex.c b/clang/test/Headers/nvptx_device_math_complex.c new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_complex.c @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +// CHECK-DAG: call { float, float } @__divsc3( +// CHECK-DAG: call { float, float } @__mulsc3( +void test_scmplx(float _Complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +} + + +// CHECK-DAG: call { double, double } @__divdc3( +// CHECK-DAG: call { double, double } @__muldc3( +void test_dcmplx(double _Complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +} diff --git a/clang/test/Headers/nvptx_device_math_functions.c b/clang/test/Headers/nvptx_device_math_functions.c --- a/clang/test/Headers/nvptx_device_math_functions.c +++ b/clang/test/Headers/nvptx_device_math_functions.c @@ -3,23 +3,26 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include math.h -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include math.h -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -x c -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +#include #include void test_sqrt(double a1) { #pragma omp target { - // CHECK-YES: call double @__nv_sqrt(double + // CHECK: call double @__nv_sqrt(double double l1 = sqrt(a1); - // CHECK-YES: call double @__nv_pow(double + // CHECK: call double @__nv_pow(double double l2 = pow(a1, a1); - // CHECK-YES: call double @__nv_modf(double + // CHECK: call double @__nv_modf(double double l3 = modf(a1 + 3.5, &a1); - // CHECK-YES: call double @__nv_fabs(double + // CHECK: call double @__nv_fabs(double double l4 = fabs(a1); - // CHECK-YES: call i32 @__nv_abs(i32 + // CHECK: call i32 @__nv_abs(i32 double l5 = abs((int)a1); } } diff --git a/clang/test/Headers/nvptx_device_math_functions.cpp b/clang/test/Headers/nvptx_device_math_functions.cpp --- a/clang/test/Headers/nvptx_device_math_functions.cpp +++ b/clang/test/Headers/nvptx_device_math_functions.cpp @@ -3,11 +3,11 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include math.h -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include math.h -internal-isystem %S/Inputs/include -include stdlib.h -include limits -include cstdlib -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s #include -#include +#include void test_sqrt(double a1) { #pragma omp target diff --git a/clang/test/Headers/nvptx_device_math_functions_cxx17.cpp b/clang/test/Headers/nvptx_device_math_functions_cxx17.cpp --- a/clang/test/Headers/nvptx_device_math_functions_cxx17.cpp +++ b/clang/test/Headers/nvptx_device_math_functions_cxx17.cpp @@ -3,11 +3,11 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include math.h -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17 -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include math.h -internal-isystem %S/Inputs/include -include stdlib.h -include limits -include cstdlib -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17 +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s #include -#include +#include void test_sqrt(double a1) { #pragma omp target diff --git a/clang/test/Headers/nvptx_device_math_macro.cpp b/clang/test/Headers/nvptx_device_math_macro.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_macro.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +#include + +#pragma omp declare target +int use_macro() { + double a(0); +// CHECK: %call = call i32 @_ZL10fpclassifyd(double %0) +// CHECK: %cmp = icmp ne i32 %call, 2 + return (std::fpclassify(a) != FP_ZERO); +} +#pragma omp end declare target diff --git a/clang/test/Headers/nvptx_device_math_sin.c b/clang/test/Headers/nvptx_device_math_sin.c new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_sin.c @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -x c -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=SLOW +// RUN: %clang_cc1 -x c -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math +// RUN: %clang_cc1 -x c -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math | FileCheck %s --check-prefix=FAST +// expected-no-diagnostics + +#include + +double math(float f, double d, long double ld) { + double r = 0; +// SLOW: call float @__nv_sinf(float +// FAST: call fast float @__nv_fast_sinf(float + r += sinf(f); +// SLOW: call double @__nv_sin(double +// FAST: call fast double @__nv_sin(double + r += sin(d); + return r; +} + +long double foo(float f, double d, long double ld) { + double r = ld; + r += math(f, d, ld); +#pragma omp target map(r) + { r += math(f, d, ld); } + return r; +} diff --git a/clang/test/Headers/nvptx_device_math_sin.cpp b/clang/test/Headers/nvptx_device_math_sin.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_sin.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=SLOW +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math +// RUN: %clang_cc1 -x c++ -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math | FileCheck %s --check-prefix=FAST +// expected-no-diagnostics + +#include + +double math(float f, double d, long double ld) { + double r = 0; +// SLOW: call float @__nv_sinf(float +// FAST: call fast float @__nv_fast_sinf(float + r += sin(f); +// SLOW: call double @__nv_sin(double +// FAST: call fast double @__nv_sin(double + r += sin(d); + return r; +} + +long double foo(float f, double d, long double ld) { + double r = ld; + r += math(f, d, ld); +#pragma omp target map(r) + { r += math(f, d, ld); } + return r; +}