diff --git a/clang/lib/Headers/__clang_cuda_cmath.h b/clang/lib/Headers/__clang_cuda_cmath.h --- a/clang/lib/Headers/__clang_cuda_cmath.h +++ b/clang/lib/Headers/__clang_cuda_cmath.h @@ -66,10 +66,38 @@ } // For inscrutable reasons, the CUDA headers define these functions for us on -// Windows. For OpenMP we omit these as some old system headers have -// non-conforming `isinf(float)` and `isnan(float)` implementations that return -// an `int`. The system versions of these functions should be fine anyway. -#if !defined(_MSC_VER) && !defined(__OPENMP_NVPTX__) +// Windows. +#if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__) + +// For OpenMP we work around some old system headers that have non-conforming +// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do +// this by providing two versions of these functions, differing only in the +// return type. To avoid conflicting definitions we disable implicit base +// function generation. That means we will end up with two specializations, one +// per type, but only one has a base function defined by the system header. +#if defined(__OPENMP_NVPTX__) +#pragma omp begin declare variant match( \ + implementation = {extension(disable_implicit_base)}) + +// FIXME: We lack an extension to customize the mangling of the variants, e.g., +// add a suffix. This means we would clash with the names of the variants +// (note that we do not create implicit base functions here). To avoid +// this clash we add a new trait to some of them that is always true +// (this is LLVM after all ;)). It will only influence the mangled name +// of the variants inside the inner region and avoid the clash. +#pragma omp begin declare variant match(implementation = {vendor(llvm)}) + +__DEVICE__ int isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ int isinf(double __x) { return ::__isinf(__x); } +__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); } +__DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); } +__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ int isnan(double __x) { return ::__isnan(__x); } + +#pragma omp end declare variant + +#endif + __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } @@ -79,6 +107,11 @@ __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } + +#if defined(__OPENMP_NVPTX__) +#pragma omp end declare variant +#endif + #endif __DEVICE__ bool isgreater(float __x, float __y) { diff --git a/clang/test/Headers/Inputs/include/cmath b/clang/test/Headers/Inputs/include/cmath --- a/clang/test/Headers/Inputs/include/cmath +++ b/clang/test/Headers/Inputs/include/cmath @@ -82,8 +82,13 @@ bool islessgreater(double, double); bool islessgreater(float, float); bool isnan(long double); +#ifdef USE_ISNAN_WITH_INT_RETURN +int isnan(double); +int isnan(float); +#else bool isnan(double); bool isnan(float); +#endif bool isnormal(double); bool isnormal(float); bool isunordered(double, double); diff --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Headers/openmp_device_math_isnan.cpp @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=BOOL_RETURN +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=BOOL_RETURN +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN +// expected-no-diagnostics + +#include + +double math(float f, double d) { + double r = 0; + // INT_RETURN: call i32 @__nv_isnanf(float + // BOOL_RETURN: call i32 @__nv_isnanf(float + r += std::isnan(f); + // INT_RETURN: call i32 @__nv_isnand(double + // BOOL_RETURN: call i32 @__nv_isnand(double + r += std::isnan(d); + return r; +} + +long double foo(float f, double d, long double ld) { + double r = ld; + r += math(f, d); +#pragma omp target map(r) + { r += math(f, d); } + return r; +}