diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1216,7 +1216,7 @@ } CmdArgs.push_back("-include"); - CmdArgs.push_back("__clang_openmp_math_declares.h"); + CmdArgs.push_back("__clang_openmp_device_functions.h"); } // Add -i* options, and automatically translate to diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -145,8 +145,7 @@ set(openmp_wrapper_files openmp_wrappers/math.h openmp_wrappers/cmath - openmp_wrappers/__clang_openmp_math.h - openmp_wrappers/__clang_openmp_math_declares.h + openmp_wrappers/__clang_openmp_device_functions.h openmp_wrappers/new ) diff --git a/clang/lib/Headers/__clang_cuda_cmath.h b/clang/lib/Headers/__clang_cuda_cmath.h --- a/clang/lib/Headers/__clang_cuda_cmath.h +++ b/clang/lib/Headers/__clang_cuda_cmath.h @@ -12,7 +12,9 @@ #error "This file is for CUDA compilation only." #endif +#ifndef _OPENMP #include +#endif // CUDA lets us use various std math functions on the device side. This file // works in concert with __clang_cuda_math_forward_declares.h to make this work. @@ -31,31 +33,15 @@ // std covers all of the known knowns. #ifdef _OPENMP -#define __DEVICE__ static __attribute__((always_inline)) +#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) #endif -// For C++ 17 we need to include noexcept attribute to be compatible -// with the header-defined version. This may be removed once -// variant is supported. -#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L -#define __NOEXCEPT noexcept -#else -#define __NOEXCEPT -#endif - -#if !(defined(_OPENMP) && defined(__cplusplus)) __DEVICE__ long long abs(long long __n) { return ::llabs(__n); } __DEVICE__ long abs(long __n) { return ::labs(__n); } __DEVICE__ float abs(float __x) { return ::fabsf(__x); } __DEVICE__ double abs(double __x) { return ::fabs(__x); } -#endif -// TODO: remove once variat is supported. -#if defined(_OPENMP) && defined(__cplusplus) -__DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); } -__DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); } -#endif __DEVICE__ float acos(float __x) { return ::acosf(__x); } __DEVICE__ float asin(float __x) { return ::asinf(__x); } __DEVICE__ float atan(float __x) { return ::atanf(__x); } @@ -64,11 +50,9 @@ __DEVICE__ float cos(float __x) { return ::cosf(__x); } __DEVICE__ float cosh(float __x) { return ::coshf(__x); } __DEVICE__ float exp(float __x) { return ::expf(__x); } -__DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); } +__DEVICE__ float fabs(float __x) { return ::fabsf(__x); } __DEVICE__ float floor(float __x) { return ::floorf(__x); } __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } -// TODO: remove when variant is supported -#ifndef _OPENMP __DEVICE__ int fpclassify(float __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); @@ -77,7 +61,6 @@ return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } -#endif __DEVICE__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } @@ -161,6 +144,8 @@ // libdevice doesn't provide an implementation, and we don't want to be in the // business of implementing tricky libm functions in this header. +#ifndef _OPENMP + // Now we've defined everything we promised we'd define in // __clang_cuda_math_forward_declares.h. We need to do two additional things to // fix up our math functions. @@ -457,10 +442,7 @@ using ::remquof; using ::rintf; using ::roundf; -// TODO: remove once variant is supported -#ifndef _OPENMP using ::scalblnf; -#endif using ::scalbnf; using ::sinf; using ::sinhf; @@ -479,7 +461,8 @@ } // namespace std #endif -#undef __NOEXCEPT +#endif // _OPENMP + #undef __DEVICE__ #endif diff --git a/clang/lib/Headers/__clang_cuda_device_functions.h b/clang/lib/Headers/__clang_cuda_device_functions.h --- a/clang/lib/Headers/__clang_cuda_device_functions.h +++ b/clang/lib/Headers/__clang_cuda_device_functions.h @@ -21,7 +21,7 @@ // functions and __forceinline__ helps inlining these wrappers at -O1. #pragma push_macro("__DEVICE__") #ifdef _OPENMP -#define __DEVICE__ static __attribute__((always_inline)) +#define __DEVICE__ static __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __device__ __forceinline__ #endif diff --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h --- a/clang/lib/Headers/__clang_cuda_math.h +++ b/clang/lib/Headers/__clang_cuda_math.h @@ -23,11 +23,29 @@ // functions and __forceinline__ helps inlining these wrappers at -O1. #pragma push_macro("__DEVICE__") #ifdef _OPENMP -#define __DEVICE__ static __inline__ __attribute__((always_inline)) +#if defined(__cplusplus) +#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) +#else +#define __DEVICE__ static __attribute__((always_inline, nothrow)) +#endif #else #define __DEVICE__ static __device__ __forceinline__ #endif +// Specialized version of __DEVICE__ for functions with void return type. Needed +// because the OpenMP overlay requires constexpr functions here but prior to +// c++14 void return functions could not be constexpr. +#pragma push_macro("__DEVICE_VOID__") +#ifdef _OPENMP +#if defined(__cplusplus) && __cplusplus >= 201402L +#define __DEVICE_VOID__ static constexpr __attribute__((always_inline, nothrow)) +#else +#define __DEVICE_VOID__ static __attribute__((always_inline, nothrow)) +#endif +#else +#define __DEVICE_VOID__ __DEVICE__ +#endif + // libdevice provides fast low precision and slow full-recision implementations // for some functions. Which one gets selected depends on // __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if @@ -39,17 +57,8 @@ #define __FAST_OR_SLOW(fast, slow) slow #endif -// For C++ 17 we need to include noexcept attribute to be compatible -// with the header-defined version. This may be removed once -// variant is supported. -#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L -#define __NOEXCEPT noexcept -#else -#define __NOEXCEPT -#endif - -__DEVICE__ int abs(int __a) __NOEXCEPT { return __nv_abs(__a); } -__DEVICE__ double fabs(double __a) __NOEXCEPT { return __nv_fabs(__a); } +__DEVICE__ int abs(int __a) { return __nv_abs(__a); } +__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); } __DEVICE__ double acos(double __a) { return __nv_acos(__a); } __DEVICE__ float acosf(float __a) { return __nv_acosf(__a); } __DEVICE__ double acosh(double __a) { return __nv_acosh(__a); } @@ -104,7 +113,7 @@ __DEVICE__ float expf(float __a) { return __nv_expf(__a); } __DEVICE__ double expm1(double __a) { return __nv_expm1(__a); } __DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); } -__DEVICE__ float fabsf(float __a) __NOEXCEPT { return __nv_fabsf(__a); } +__DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); } __DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); } __DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); } __DEVICE__ double fdivide(double __a, double __b) { return __a / __b; } @@ -142,15 +151,15 @@ __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); } __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); } #if defined(__LP64__) || defined(_WIN64) -__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_llabs(__a); }; +__DEVICE__ long labs(long __a) { return __nv_llabs(__a); }; #else -__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_abs(__a); }; +__DEVICE__ long labs(long __a) { return __nv_abs(__a); }; #endif __DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); } __DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); } __DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); } __DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); } -__DEVICE__ long long llabs(long long __a) __NOEXCEPT { return __nv_llabs(__a); } +__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); } __DEVICE__ long long llmax(long long __a, long long __b) { return __nv_llmax(__a, __b); } @@ -270,8 +279,6 @@ __DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); } __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); } __DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); } -// TODO: remove once variant is supported -#ifndef _OPENMP __DEVICE__ double scalbln(double __a, long __b) { if (__b > INT_MAX) return __a > 0 ? HUGE_VAL : -HUGE_VAL; @@ -286,18 +293,17 @@ return __a > 0 ? 0.f : -0.f; return scalbnf(__a, (int)__b); } -#endif __DEVICE__ double sin(double __a) { return __nv_sin(__a); } -__DEVICE__ void sincos(double __a, double *__s, double *__c) { +__DEVICE_VOID__ void sincos(double __a, double *__s, double *__c) { return __nv_sincos(__a, __s, __c); } -__DEVICE__ void sincosf(float __a, float *__s, float *__c) { +__DEVICE_VOID__ void sincosf(float __a, float *__s, float *__c) { return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __s, __c); } -__DEVICE__ void sincospi(double __a, double *__s, double *__c) { +__DEVICE_VOID__ void sincospi(double __a, double *__s, double *__c) { return __nv_sincospi(__a, __s, __c); } -__DEVICE__ void sincospif(float __a, float *__s, float *__c) { +__DEVICE_VOID__ void sincospif(float __a, float *__s, float *__c) { return __nv_sincospif(__a, __s, __c); } __DEVICE__ float sinf(float __a) { @@ -339,7 +345,7 @@ __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); } #pragma pop_macro("__DEVICE__") +#pragma pop_macro("__DEVICE_VOID__") #pragma pop_macro("__FAST_OR_SLOW") -#undef __NOEXCEPT #endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__ diff --git a/clang/lib/Headers/__clang_cuda_math_forward_declares.h b/clang/lib/Headers/__clang_cuda_math_forward_declares.h --- a/clang/lib/Headers/__clang_cuda_math_forward_declares.h +++ b/clang/lib/Headers/__clang_cuda_math_forward_declares.h @@ -20,37 +20,14 @@ // would preclude the use of our own __device__ overloads for these functions. #pragma push_macro("__DEVICE__") -#ifdef _OPENMP -#define __DEVICE__ static __inline__ __attribute__((always_inline)) -#else #define __DEVICE__ \ static __inline__ __attribute__((always_inline)) __attribute__((device)) -#endif - -// For C++ 17 we need to include noexcept attribute to be compatible -// with the header-defined version. This may be removed once -// variant is supported. -#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L -#define __NOEXCEPT noexcept -#else -#define __NOEXCEPT -#endif -#if !(defined(_OPENMP) && defined(__cplusplus)) __DEVICE__ long abs(long); __DEVICE__ long long abs(long long); __DEVICE__ double abs(double); __DEVICE__ float abs(float); -#endif -// While providing the CUDA declarations and definitions for math functions, -// we may manually define additional functions. -// TODO: Once variant is supported the additional functions will have -// to be removed. -#if defined(_OPENMP) && defined(__cplusplus) -__DEVICE__ const double abs(const double); -__DEVICE__ const float abs(const float); -#endif -__DEVICE__ int abs(int) __NOEXCEPT; +__DEVICE__ int abs(int); __DEVICE__ double acos(double); __DEVICE__ float acos(float); __DEVICE__ double acosh(double); @@ -85,8 +62,8 @@ __DEVICE__ float exp(float); __DEVICE__ double expm1(double); __DEVICE__ float expm1(float); -__DEVICE__ double fabs(double) __NOEXCEPT; -__DEVICE__ float fabs(float) __NOEXCEPT; +__DEVICE__ double fabs(double); +__DEVICE__ float fabs(float); __DEVICE__ double fdim(double, double); __DEVICE__ float fdim(float, float); __DEVICE__ double floor(double); @@ -136,12 +113,12 @@ __DEVICE__ bool isnormal(float); __DEVICE__ bool isunordered(double, double); __DEVICE__ bool isunordered(float, float); -__DEVICE__ long labs(long) __NOEXCEPT; +__DEVICE__ long labs(long); __DEVICE__ double ldexp(double, int); __DEVICE__ float ldexp(float, int); __DEVICE__ double lgamma(double); __DEVICE__ float lgamma(float); -__DEVICE__ long long llabs(long long) __NOEXCEPT; +__DEVICE__ long long llabs(long long); __DEVICE__ long long llrint(double); __DEVICE__ long long llrint(float); __DEVICE__ double log10(double); @@ -152,9 +129,6 @@ __DEVICE__ float log2(float); __DEVICE__ double logb(double); __DEVICE__ float logb(float); -#if defined(_OPENMP) && defined(__cplusplus) -__DEVICE__ long double log(long double); -#endif __DEVICE__ double log(double); __DEVICE__ float log(float); __DEVICE__ long lrint(double); @@ -302,7 +276,6 @@ } // namespace std #endif -#undef __NOEXCEPT #pragma pop_macro("__DEVICE__") #endif diff --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h rename from clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h rename to clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h --- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h +++ b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h @@ -1,4 +1,4 @@ -/*===---- __clang_openmp_math_declares.h - OpenMP math declares ------------=== +/*===- __clang_openmp_device_functions.h - OpenMP device function declares -=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. @@ -7,28 +7,32 @@ *===-----------------------------------------------------------------------=== */ -#ifndef __CLANG_OPENMP_MATH_DECLARES_H__ -#define __CLANG_OPENMP_MATH_DECLARES_H__ +#ifndef __CLANG_OPENMP_DEVICE_FUNCTIONS_H__ +#define __CLANG_OPENMP_DEVICE_FUNCTIONS_H__ #ifndef _OPENMP #error "This file is for OpenMP compilation only." #endif -#if defined(__NVPTX__) && defined(_OPENMP) +#pragma omp begin declare variant match( \ + device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) -#define __CUDA__ - -#if defined(__cplusplus) - #include <__clang_cuda_math_forward_declares.h> +#ifdef __cplusplus +extern "C" { #endif +#define __CUDA__ /// Include declarations for libdevice functions. #include <__clang_cuda_libdevice_declares.h> + /// Provide definitions for these functions. #include <__clang_cuda_device_functions.h> -#include <__clang_cuda_math.h> - #undef __CUDA__ +#ifdef __cplusplus +} // extern "C" #endif + +#pragma omp end declare variant + #endif diff --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h deleted file mode 100644 --- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h +++ /dev/null @@ -1,35 +0,0 @@ -/*===---- __clang_openmp_math.h - OpenMP target math support ---------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===-----------------------------------------------------------------------=== - */ - -#if defined(__NVPTX__) && defined(_OPENMP) -/// TODO: -/// We are currently reusing the functionality of the Clang-CUDA code path -/// as an alternative to the host declarations provided by math.h and cmath. -/// This is suboptimal. -/// -/// We should instead declare the device functions in a similar way, e.g., -/// through OpenMP 5.0 variants, and afterwards populate the module with the -/// host declarations by unconditionally including the host math.h or cmath, -/// respectively. This is actually what the Clang-CUDA code path does, using -/// __device__ instead of variants to avoid redeclarations and get the desired -/// overload resolution. - -#define __CUDA__ - -#if defined(__cplusplus) - #include <__clang_cuda_cmath.h> -#endif - -#undef __CUDA__ - -/// Magic macro for stopping the math.h/cmath host header from being included. -#define __CLANG_NO_HOST_MATH__ - -#endif - diff --git a/clang/lib/Headers/openmp_wrappers/cmath b/clang/lib/Headers/openmp_wrappers/cmath --- a/clang/lib/Headers/openmp_wrappers/cmath +++ b/clang/lib/Headers/openmp_wrappers/cmath @@ -1,4 +1,4 @@ -/*===-------------- cmath - Alternative cmath header -----------------------=== +/*===-- __clang_openmp_device_functions.h - OpenMP math declares ------ c++ -=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. @@ -7,10 +7,63 @@ *===-----------------------------------------------------------------------=== */ -#include <__clang_openmp_math.h> +#ifndef __CLANG_OPENMP_CMATH_H__ +#define __CLANG_OPENMP_CMATH_H__ + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif -#ifndef __CLANG_NO_HOST_MATH__ #include_next -#else -#undef __CLANG_NO_HOST_MATH__ + +// Make sure we include our math.h overlay, it probably happend already but we +// need to be sure. +#include + +#pragma omp begin declare variant match( \ + device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) + +#define __CUDA__ +#include <__clang_cuda_cmath.h> +#undef __CUDA__ + +// Overloads not provided by the CUDA wrappers but by the CUDA system headers. +// Since we do not include the latter we define them ourselves. +#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) + +__DEVICE__ float acosh(float __x) { return ::acoshf(__x); } +__DEVICE__ float asinh(float __x) { return ::asinhf(__x); } +__DEVICE__ float atanh(float __x) { return ::atanhf(__x); } +__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); } +__DEVICE__ float erf(float __x) { return ::erff(__x); } +__DEVICE__ float erfc(float __x) { return ::erfcf(__x); } +__DEVICE__ float exp2(float __x) { return ::exp2f(__x); } +__DEVICE__ float expm1(float __x) { return ::expm1f(__x); } +__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); } +__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); } +__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); } +__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); } +__DEVICE__ long long int llrint(float __x) { return ::llrintf(__x); } +__DEVICE__ long long int llround(float __x) { return ::llroundf(__x); } +__DEVICE__ float log1p(float __x) { return ::log1pf(__x); } +__DEVICE__ float log2(float __x) { return ::log2f(__x); } +__DEVICE__ float logb(float __x) { return ::logbf(__x); } +__DEVICE__ long int lrint(float __x) { return ::lrintf(__x); } +__DEVICE__ long int lround(float __x) { return ::lroundf(__x); } +__DEVICE__ float nextafter(float __x, float __y) { + return ::nextafterf(__x, __y); +} +__DEVICE__ float remainder(float __x, float __y) { + return ::remainderf(__x, __y); +} +__DEVICE__ float scalbln(float __x, long int __y) { + return ::scalblnf(__x, __y); +} +__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); } +__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); } + +#undef __DEVICE__ + +#pragma omp end declare variant + #endif diff --git a/clang/lib/Headers/openmp_wrappers/math.h b/clang/lib/Headers/openmp_wrappers/math.h --- a/clang/lib/Headers/openmp_wrappers/math.h +++ b/clang/lib/Headers/openmp_wrappers/math.h @@ -1,4 +1,4 @@ -/*===------------- math.h - Alternative math.h header ----------------------=== +/*===---- openmp_wrapper/math.h -------- OpenMP math.h intercept ------ c++ -=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. @@ -7,11 +7,26 @@ *===-----------------------------------------------------------------------=== */ -#include <__clang_openmp_math.h> +#ifndef __CLANG_OPENMP_MATH_H__ +#define __CLANG_OPENMP_MATH_H__ -#ifndef __CLANG_NO_HOST_MATH__ -#include_next -#else -#undef __CLANG_NO_HOST_MATH__ +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." #endif +#include_next + +// We need limits.h for __clang_cuda_math.h below and because it should not hurt +// we include it eagerly here. +#include + +#pragma omp begin declare variant match( \ + device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) + +#define __CUDA__ +#include <__clang_cuda_math.h> +#undef __CUDA__ + +#pragma omp end declare variant + +#endif diff --git a/clang/lib/Headers/openmp_wrappers/time.h b/clang/lib/Headers/openmp_wrappers/time.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/openmp_wrappers/time.h @@ -0,0 +1,32 @@ +/*===---- time.h - OpenMP time header wrapper ------------------------ c ---=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_OPENMP_TIME_H__ +#define __CLANG_OPENMP_TIME_H__ + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif + +#if defined(__cplusplus) +#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) +#else +#define __DEVICE__ static __attribute__((always_inline, nothrow)) +#endif + +#include_next + +#pragma omp begin declare variant match( \ + device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) + +__DEVICE__ clock_t clock() { return __nvvm_read_ptx_sreg_clock(); } + +#pragma omp end declare variant + +#endif diff --git a/clang/test/Headers/Inputs/include/climits b/clang/test/Headers/Inputs/include/climits new file mode 100644 --- /dev/null +++ b/clang/test/Headers/Inputs/include/climits @@ -0,0 +1,4 @@ +#pragma once + +#define INT_MIN -2147483648 +#define INT_MAX 2147483647 diff --git a/clang/test/Headers/Inputs/include/cmath b/clang/test/Headers/Inputs/include/cmath --- a/clang/test/Headers/Inputs/include/cmath +++ b/clang/test/Headers/Inputs/include/cmath @@ -1,5 +1,227 @@ #pragma once -double sqrt(double); +// __clang_cuda_(c)math(.h) also provide `abs` which actually belong in +// cstdlib. We could split them out but for now we just include cstdlib from +// cmath.h which is what the systems I've seen do as well. +#include + +#include + +double acos(double); +float acos(float); +double acosh(double); +float acosh(float); +double asin(double); +float asin(float); +double asinh(double); +float asinh(float); +double atan2(double, double); +float atan2(float, float); +double atan(double); +float atan(float); +double atanh(double); +float atanh(float); +double cbrt(double); +float cbrt(float); +double ceil(double); +float ceil(float); +double copysign(double, double); +float copysign(float, float); +double cos(double); +float cos(float); +double cosh(double); +float cosh(float); +double erfc(double); +float erfc(float); +double erf(double); +float erf(float); +double exp2(double); +float exp2(float); +double exp(double); +float exp(float); +double expm1(double); +float expm1(float); +double fdim(double, double); +float fdim(float, float); +double floor(double); +float floor(float); +double fma(double, double, double); +float fma(float, float, float); +double fmax(double, double); +float fmax(float, float); +double fmin(double, double); +float fmin(float, float); +double fmod(double, double); +float fmod(float, float); +int fpclassify(double); +int fpclassify(float); +double frexp(double, int *); +float frexp(float, int *); +double hypot(double, double); +float hypot(float, float); +int ilogb(double); +int ilogb(float); +bool isfinite(long double); +bool isfinite(double); +bool isfinite(float); +bool isgreater(double, double); +bool isgreaterequal(double, double); +bool isgreaterequal(float, float); +bool isgreater(float, float); +bool isinf(long double); +bool isinf(double); +bool isinf(float); +bool isless(double, double); +bool islessequal(double, double); +bool islessequal(float, float); +bool isless(float, float); +bool islessgreater(double, double); +bool islessgreater(float, float); +bool isnan(long double); +bool isnan(double); +bool isnan(float); +bool isnormal(double); +bool isnormal(float); +bool isunordered(double, double); +bool isunordered(float, float); +double ldexp(double, int); +float ldexp(float, int); +double lgamma(double); +float lgamma(float); +long long llrint(double); +long long llrint(float); +double log10(double); +float log10(float); +double log1p(double); +float log1p(float); +double log2(double); +float log2(float); +double logb(double); +float logb(float); +double log(double); +float log(float); +long lrint(double); +long lrint(float); +long lround(double); +long lround(float); +long long llround(float); // No llround(double). +double modf(double, double *); +float modf(float, float *); +double nan(const char *); +float nanf(const char *); +double nearbyint(double); +float nearbyint(float); +double nextafter(double, double); +float nextafter(float, float); double pow(double, double); -double modf(double, double*); +double pow(double, int); +float pow(float, float); +float pow(float, int); +double remainder(double, double); +float remainder(float, float); +double remquo(double, double, int *); +float remquo(float, float, int *); +double rint(double); +float rint(float); +double round(double); +float round(float); +double scalbln(double, long); +float scalbln(float, long); +double scalbn(double, int); +float scalbn(float, int); +bool signbit(double); +bool signbit(float); +long double sin(long double); +double sin(double); +float sin(float); +double sinh(double); +float sinh(float); +double sqrt(double); +float sqrt(float); +double tan(double); +float tan(float); +double tanh(double); +float tanh(float); +double tgamma(double); +float tgamma(float); +double trunc(double); +float trunc(float); + +namespace std { + +using ::acos; +using ::acosh; +using ::asin; +using ::asinh; +using ::atan; +using ::atan2; +using ::atanh; +using ::cbrt; +using ::ceil; +using ::copysign; +using ::cos; +using ::cosh; +using ::erf; +using ::erfc; +using ::exp; +using ::exp2; +using ::expm1; +using ::fdim; +using ::floor; +using ::fma; +using ::fmax; +using ::fmin; +using ::fmod; +using ::fpclassify; +using ::frexp; +using ::hypot; +using ::ilogb; +using ::isfinite; +using ::isgreater; +using ::isgreaterequal; +using ::isinf; +using ::isless; +using ::islessequal; +using ::islessgreater; +using ::isnan; +using ::isnormal; +using ::isunordered; +using ::ldexp; +using ::lgamma; +using ::llrint; +using ::log; +using ::log10; +using ::log1p; +using ::log2; +using ::logb; +using ::lrint; +using ::lround; +using ::llround; +using ::modf; +using ::nan; +using ::nanf; +using ::nearbyint; +using ::nextafter; +using ::pow; +using ::remainder; +using ::remquo; +using ::rint; +using ::round; +using ::scalbln; +using ::scalbn; +using ::signbit; +using ::sin; +using ::sinh; +using ::sqrt; +using ::tan; +using ::tanh; +using ::tgamma; +using ::trunc; + +} // namespace std + +#define FP_NAN 0 +#define FP_INFINITE 1 +#define FP_ZERO 2 +#define FP_SUBNORMAL 3 +#define FP_NORMAL 4 diff --git a/clang/test/Headers/Inputs/include/cstdlib b/clang/test/Headers/Inputs/include/cstdlib --- a/clang/test/Headers/Inputs/include/cstdlib +++ b/clang/test/Headers/Inputs/include/cstdlib @@ -1,5 +1,7 @@ #pragma once +#include + #if __cplusplus >= 201703L extern int abs (int __x) throw() __attribute__ ((__const__)) ; extern long int labs (long int __x) throw() __attribute__ ((__const__)) ; @@ -20,4 +22,6 @@ inline long long abs(long long __x) { return __builtin_llabs (__x); } + +float fabs(float __x) { return __builtin_fabs(__x); } } diff --git a/clang/test/Headers/Inputs/include/math.h b/clang/test/Headers/Inputs/include/math.h --- a/clang/test/Headers/Inputs/include/math.h +++ b/clang/test/Headers/Inputs/include/math.h @@ -1,5 +1,199 @@ #pragma once -double sqrt(double); -double pow(double, double); -double modf(double, double*); +// __clang_cuda_(c)math(.h) also provide `abs` which actually belong in +// cstdlib. We could split them out but for now we just include cstdlib from +// cmath.h which is what the systems I've seen do as well. +#include + +double fabs(double __a); +double acos(double __a); +float acosf(float __a); +double acosh(double __a); +float acoshf(float __a); +double asin(double __a); +float asinf(float __a); +double asinh(double __a); +float asinhf(float __a); +double atan(double __a); +double atan2(double __a, double __b); +float atan2f(float __a, float __b); +float atanf(float __a); +double atanh(double __a); +float atanhf(float __a); +double cbrt(double __a); +float cbrtf(float __a); +double ceil(double __a); +float ceilf(float __a); +double copysign(double __a, double __b); +float copysignf(float __a, float __b); +double cos(double __a); +float cosf(float __a); +double cosh(double __a); +float coshf(float __a); +double cospi(double __a); +float cospif(float __a); +double cyl_bessel_i0(double __a); +float cyl_bessel_i0f(float __a); +double cyl_bessel_i1(double __a); +float cyl_bessel_i1f(float __a); +double erf(double __a); +double erfc(double __a); +float erfcf(float __a); +double erfcinv(double __a); +float erfcinvf(float __a); +double erfcx(double __a); +float erfcxf(float __a); +float erff(float __a); +double erfinv(double __a); +float erfinvf(float __a); +double exp(double __a); +double exp10(double __a); +float exp10f(float __a); +double exp2(double __a); +float exp2f(float __a); +float expf(float __a); +double expm1(double __a); +float expm1f(float __a); +float fabsf(float __a); +double fdim(double __a, double __b); +float fdimf(float __a, float __b); +double fdivide(double __a, double __b); +float fdividef(float __a, float __b); +double floor(double __f); +float floorf(float __f); +double fma(double __a, double __b, double __c); +float fmaf(float __a, float __b, float __c); +double fmax(double __a, double __b); +float fmaxf(float __a, float __b); +double fmin(double __a, double __b); +float fminf(float __a, float __b); +double fmod(double __a, double __b); +float fmodf(float __a, float __b); +double frexp(double __a, int *__b); +float frexpf(float __a, int *__b); +double hypot(double __a, double __b); +float hypotf(float __a, float __b); +int ilogb(double __a); +int ilogbf(float __a); +double j0(double __a); +float j0f(float __a); +double j1(double __a); +float j1f(float __a); +double jn(int __n, double __a); +float jnf(int __n, float __a); +double ldexp(double __a, int __b); +float ldexpf(float __a, int __b); +double lgamma(double __a); +float lgammaf(float __a); +long long llmax(long long __a, long long __b); +long long llmin(long long __a, long long __b); +long long llrint(double __a); +long long llrintf(float __a); +long long llround(double __a); +long long llroundf(float __a); +double log(double __a); +double log10(double __a); +float log10f(float __a); +double log1p(double __a); +float log1pf(float __a); +double log2(double __a); +float log2f(float __a); +double logb(double __a); +float logbf(float __a); +float logf(float __a); +long lrint(double __a); +long lrintf(float __a); +long lround(double __a); +long lroundf(float __a); +int max(int __a, int __b); +int min(int __a, int __b); +double modf(double __a, double *__b); +float modff(float __a, float *__b); +double nearbyint(double __a); +float nearbyintf(float __a); +double nextafter(double __a, double __b); +float nextafterf(float __a, float __b); +double norm(int __dim, const double *__t); +double norm3d(double __a, double __b, double __c); +float norm3df(float __a, float __b, float __c); +double norm4d(double __a, double __b, double __c, double __d); +float norm4df(float __a, float __b, float __c, float __d); +double normcdf(double __a); +float normcdff(float __a); +double normcdfinv(double __a); +float normcdfinvf(float __a); +float normf(int __dim, const float *__t); +double pow(double __a, double __b); +float powf(float __a, float __b); +double powi(double __a, int __b); +float powif(float __a, int __b); +double rcbrt(double __a); +float rcbrtf(float __a); +double remainder(double __a, double __b); +float remainderf(float __a, float __b); +double remquo(double __a, double __b, int *__c); +float remquof(float __a, float __b, int *__c); +double rhypot(double __a, double __b); +float rhypotf(float __a, float __b); +double rint(double __a); +float rintf(float __a); +double rnorm(int __a, const double *__b); +double rnorm3d(double __a, double __b, double __c); +float rnorm3df(float __a, float __b, float __c); +double rnorm4d(double __a, double __b, double __c, double __d); +float rnorm4df(float __a, float __b, float __c, float __d); +float rnormf(int __dim, const float *__t); +double round(double __a); +float roundf(float __a); +double rsqrt(double __a); +float rsqrtf(float __a); +double scalbn(double __a, int __b); +float scalbnf(float __a, int __b); +double scalbln(double __a, long __b); +float scalblnf(float __a, long __b); +double sin(double __a); +void sincos(double __a, double *__s, double *__c); +void sincosf(float __a, float *__s, float *__c); +void sincospi(double __a, double *__s, double *__c); +void sincospif(float __a, float *__s, float *__c); +float sinf(float __a); +double sinh(double __a); +float sinhf(float __a); +double sinpi(double __a); +float sinpif(float __a); +double sqrt(double __a); +float sqrtf(float __a); +double tan(double __a); +float tanf(float __a); +double tanh(double __a); +float tanhf(float __a); +double tgamma(double __a); +float tgammaf(float __a); +double trunc(double __a); +float truncf(float __a); +unsigned long long ullmax(unsigned long long __a, + unsigned long long __b); +unsigned long long ullmin(unsigned long long __a, + unsigned long long __b); +unsigned int umax(unsigned int __a, unsigned int __b); +unsigned int umin(unsigned int __a, unsigned int __b); +double y0(double __a); +float y0f(float __a); +double y1(double __a); +float y1f(float __a); +double yn(int __a, double __b); +float ynf(int __a, float __b); + +/** + * A positive float constant expression. HUGE_VALF evaluates + * to +infinity. Used as an error value returned by the built-in + * math functions. + */ +#define HUGE_VALF (__builtin_huge_valf()) + +/** + * A positive double constant expression. HUGE_VAL evaluates + * to +infinity. Used as an error value returned by the built-in + * math functions. + */ +#define HUGE_VAL (__builtin_huge_val()) diff --git a/clang/test/Headers/Inputs/include/stdlib.h b/clang/test/Headers/Inputs/include/stdlib.h --- a/clang/test/Headers/Inputs/include/stdlib.h +++ b/clang/test/Headers/Inputs/include/stdlib.h @@ -1,2 +1,6 @@ #pragma once typedef __SIZE_TYPE__ size_t; + +#ifndef __cplusplus +extern int abs(int __x) __attribute__((__const__)); +#endif diff --git a/clang/test/Headers/nvptx_device_cmath_functions.c b/clang/test/Headers/nvptx_device_cmath_functions.c --- a/clang/test/Headers/nvptx_device_cmath_functions.c +++ b/clang/test/Headers/nvptx_device_cmath_functions.c @@ -3,10 +3,11 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include cmath -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include cmath -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s -#include +#include +#include void test_sqrt(double a1) { #pragma omp target diff --git a/clang/test/Headers/nvptx_device_cmath_functions.cpp b/clang/test/Headers/nvptx_device_cmath_functions.cpp --- a/clang/test/Headers/nvptx_device_cmath_functions.cpp +++ b/clang/test/Headers/nvptx_device_cmath_functions.cpp @@ -3,8 +3,8 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include cmath -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include cmath -internal-isystem %S/Inputs/include -include stdlib.h -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s #include #include diff --git a/clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp b/clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp --- a/clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp +++ b/clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp @@ -3,8 +3,8 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include cmath -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17 -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include cmath -internal-isystem %S/Inputs/include -include stdlib.h -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17 +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s #include #include diff --git a/clang/test/Headers/nvptx_device_math_complex.c b/clang/test/Headers/nvptx_device_math_complex.c new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_complex.c @@ -0,0 +1,23 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +// CHECK-DAG: call { float, float } @__divsc3( +// CHECK-DAG: call { float, float } @__mulsc3( +void test_scmplx(float _Complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +} + + +// CHECK-DAG: call { double, double } @__divdc3( +// CHECK-DAG: call { double, double } @__muldc3( +void test_dcmplx(double _Complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +} diff --git a/clang/test/Headers/nvptx_device_math_functions.c b/clang/test/Headers/nvptx_device_math_functions.c --- a/clang/test/Headers/nvptx_device_math_functions.c +++ b/clang/test/Headers/nvptx_device_math_functions.c @@ -3,23 +3,31 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include math.h -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include math.h -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -x c -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +#ifdef __cplusplus +#include +#include +#else +#include #include +#endif void test_sqrt(double a1) { #pragma omp target { - // CHECK-YES: call double @__nv_sqrt(double + // CHECK: call double @__nv_sqrt(double double l1 = sqrt(a1); - // CHECK-YES: call double @__nv_pow(double + // CHECK: call double @__nv_pow(double double l2 = pow(a1, a1); - // CHECK-YES: call double @__nv_modf(double + // CHECK: call double @__nv_modf(double double l3 = modf(a1 + 3.5, &a1); - // CHECK-YES: call double @__nv_fabs(double + // CHECK: call double @__nv_fabs(double double l4 = fabs(a1); - // CHECK-YES: call i32 @__nv_abs(i32 + // CHECK: call i32 @__nv_abs(i32 double l5 = abs((int)a1); } } diff --git a/clang/test/Headers/nvptx_device_math_functions.cpp b/clang/test/Headers/nvptx_device_math_functions.cpp --- a/clang/test/Headers/nvptx_device_math_functions.cpp +++ b/clang/test/Headers/nvptx_device_math_functions.cpp @@ -3,11 +3,11 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include math.h -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include math.h -internal-isystem %S/Inputs/include -include stdlib.h -include limits -include cstdlib -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s #include -#include +#include void test_sqrt(double a1) { #pragma omp target diff --git a/clang/test/Headers/nvptx_device_math_functions_cxx17.cpp b/clang/test/Headers/nvptx_device_math_functions_cxx17.cpp --- a/clang/test/Headers/nvptx_device_math_functions_cxx17.cpp +++ b/clang/test/Headers/nvptx_device_math_functions_cxx17.cpp @@ -3,11 +3,11 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include math.h -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17 -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include math.h -internal-isystem %S/Inputs/include -include stdlib.h -include limits -include cstdlib -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17 +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s #include -#include +#include void test_sqrt(double a1) { #pragma omp target diff --git a/clang/test/Headers/nvptx_device_math_macro.cpp b/clang/test/Headers/nvptx_device_math_macro.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_macro.cpp @@ -0,0 +1,17 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +#include + +#pragma omp declare target +int use_macro() { + double a(0); +// CHECK-NOT: call +// CHECK: call double @llvm.fabs.f64(double +// CHECK-NOT: call +// CHECK: ret i32 %conv + return (std::fpclassify(a) != FP_ZERO); +} +#pragma omp end declare target diff --git a/clang/test/Headers/nvptx_device_math_modf.cpp b/clang/test/Headers/nvptx_device_math_modf.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_modf.cpp @@ -0,0 +1,53 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s + +#include + +// 4 calls to modf(f), all translated to __nv_modf calls: + +// CHECK-NOT: _Z.modf +// CHECK: call double @__nv_modf(double +// CHECK-NOT: _Z.modf +// CHECK: call float @__nv_modff(float +// CHECK-NOT: _Z.modf +// CHECK: call double @__nv_modf(double +// CHECK-NOT: _Z.modf +// CHECK: call float @__nv_modff(float +// CHECK-NOT: _Z.modf + +template +void test_modf(T x) +{ + T dx; + int intx; + + #pragma omp target map(from: intx, dx) + { + T ipart; + dx = std::modf(x, &ipart); + intx = static_cast(ipart); + } +} + +int main() +{ + +#if !defined(C_ONLY) + test_modf(1.0); + test_modf(1.0); +#endif + + #pragma omp target + { + double intpart, res; + res = modf(1.1, &intpart); + } + + #pragma omp target + { + float intpart, res; + res = modff(1.1f, &intpart); + } + +} diff --git a/clang/test/Headers/nvptx_device_math_sin.c b/clang/test/Headers/nvptx_device_math_sin.c new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_sin.c @@ -0,0 +1,27 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -x c -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=SLOW +// RUN: %clang_cc1 -x c -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math +// RUN: %clang_cc1 -x c -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math | FileCheck %s --check-prefix=FAST +// expected-no-diagnostics + +#include + +double math(float f, double d, long double ld) { + double r = 0; +// SLOW: call float @__nv_sinf(float +// FAST: call fast float @__nv_fast_sinf(float + r += sinf(f); +// SLOW: call double @__nv_sin(double +// FAST: call fast double @__nv_sin(double + r += sin(d); + return r; +} + +long double foo(float f, double d, long double ld) { + double r = ld; + r += math(f, d, ld); +#pragma omp target map(r) + { r += math(f, d, ld); } + return r; +} diff --git a/clang/test/Headers/nvptx_device_math_sin.cpp b/clang/test/Headers/nvptx_device_math_sin.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_sin.cpp @@ -0,0 +1,27 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=SLOW +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math | FileCheck %s --check-prefix=FAST +// expected-no-diagnostics + +#include + +double math(float f, double d, long double ld) { + double r = 0; +// SLOW: call float @__nv_sinf(float +// FAST: call fast float @__nv_fast_sinf(float + r += sin(f); +// SLOW: call double @__nv_sin(double +// FAST: call fast double @__nv_sin(double + r += sin(d); + return r; +} + +long double foo(float f, double d, long double ld) { + double r = ld; + r += math(f, d, ld); +#pragma omp target map(r) + { r += math(f, d, ld); } + return r; +} diff --git a/clang/test/Headers/nvptx_device_math_sin_cos.cpp b/clang/test/Headers/nvptx_device_math_sin_cos.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_sin_cos.cpp @@ -0,0 +1,63 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s + +#include + +// 6 calls to sin/cos(f), all translated to __nv_sin/__nv_cos calls: + +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos +// CHECK: call double @__nv_sin(double +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos +// CHECK: call float @__nv_sinf(float +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos +// CHECK: call double @__nv_sin(double +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos +// CHECK: call double @__nv_cos(double +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos +// CHECK: call float @__nv_sinf(float +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos +// CHECK: call float @__nv_cosf(float +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos + +template +void test_sin_cos(T x) +{ + T res_sin, res_cos; + + #pragma omp target map(from: res_sin, res_cos) + { + res_sin = std::sin(x); + res_cos = std::cos(x); + } +} + +int main() +{ + +#if !defined(C_ONLY) + test_sin_cos(0.0); + test_sin_cos(0.0); +#endif + + #pragma omp target + { + double res; + res = sin(1.0); + } + + #pragma omp target + { + float res; + res = sinf(1.0f); + } + + return 0; +} diff --git a/clang/test/Headers/nvptx_device_math_sincos.cpp b/clang/test/Headers/nvptx_device_math_sincos.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Headers/nvptx_device_math_sincos.cpp @@ -0,0 +1,58 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s + +#include + +// 4 calls to sincos(f), all translated to __nv_sincos calls: + +// CHECK-NOT: _Z.sincos +// CHECK: call void @__nv_sincos(double +// CHECK-NOT: _Z.sincos +// CHECK: call void @__nv_sincosf(float +// CHECK-NOT: _Z.sincos +// CHECK: call void @__nv_sincos(double +// CHECK-NOT: _Z.sincos +// CHECK: call void @__nv_sincosf(float +// CHECK-NOT: _Z.sincos + +// single precision wrapper +inline void sincos(float x, float* __restrict__ sin, float* __restrict__ cos) +{ + sincosf(x, sin, cos); +} + +template +void test_sincos(T x) +{ + T res_sin, res_cos; + + #pragma omp target map(from: res_sin, res_cos) + { + sincos(x, &res_sin, &res_cos); + } + +} + +int main(int argc, char **argv) +{ + +#if !defined(C_ONLY) + test_sincos(0.0); + test_sincos(0.0); +#endif + + #pragma omp target + { + double s, c; + sincos(0, &s, &c); + } + + #pragma omp target + { + float s, c; + sincosf(0.f, &s, &c); + } + + return 0; +}