Index: cfe/trunk/lib/Headers/__clang_cuda_cmath.h =================================================================== --- cfe/trunk/lib/Headers/__clang_cuda_cmath.h +++ cfe/trunk/lib/Headers/__clang_cuda_cmath.h @@ -26,13 +26,15 @@ #error "This file is for CUDA compilation only." #endif +#include + // CUDA lets us use various std math functions on the device side. This file // works in concert with __clang_cuda_math_forward_declares.h to make this work. // // Specifically, the forward-declares header declares __device__ overloads for // these functions in the global namespace, then pulls them into namespace std // with 'using' statements. Then this file implements those functions, after -// the implementations have been pulled in. +// their implementations have been pulled in. // // It's important that we declare the functions in the global namespace and pull // them into namespace std with using statements, as opposed to simply declaring @@ -120,12 +122,15 @@ __DEVICE__ float log(float __x) { return ::logf(__x); } __DEVICE__ float log10(float __x) { return ::log10f(__x); } __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } -__DEVICE__ float nexttoward(float __from, float __to) { +__DEVICE__ float nexttoward(float __from, double __to) { return __builtin_nexttowardf(__from, __to); } __DEVICE__ double nexttoward(double __from, double __to) { return __builtin_nexttoward(__from, __to); } +__DEVICE__ float nexttowardf(float __from, double __to) { + return __builtin_nexttowardf(__from, __to); +} __DEVICE__ float pow(float __base, float __exp) { return ::powf(__base, __exp); } @@ -143,6 +148,280 @@ __DEVICE__ float tan(float __x) { return ::tanf(__x); } __DEVICE__ float tanh(float __x) { return ::tanhf(__x); } +// Now we've defined everything we promised we'd define in +// __clang_cuda_math_forward_declares.h. We need to do two additional things to +// fix up our math functions. +// +// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define +// only sin(float) and sin(double), which means that e.g. sin(0) is +// ambiguous. +// +// 2) Pull the __device__ overloads of "foobarf" math functions into namespace +// std. These are defined in the CUDA headers in the global namespace, +// independent of everything else we've done here. + +// We can't use std::enable_if, because we want to be pre-C++11 compatible. But +// we go ahead and unconditionally define functions that are only available when +// compiling for C++11 to match the behavior of the CUDA headers. +template +struct __clang_cuda_enable_if {}; + +template struct __clang_cuda_enable_if { + typedef __T type; +}; + +// Defines an overload of __fn that accepts one integral argument, calls +// __fn((double)x), and returns __retty. +#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \ + template \ + __DEVICE__ \ + typename __clang_cuda_enable_if::is_integer, \ + __retty>::type \ + __fn(__T __x) { \ + return ::__fn((double)__x); \ + } + +// Defines an overload of __fn that accepts one two arithmetic arguments, calls +// __fn((double)x, (double)y), and returns a double. +// +// Note this is different from OVERLOAD_1, which generates an overload that +// accepts only *integral* arguments. +#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ + template \ + __DEVICE__ typename __clang_cuda_enable_if< \ + std::numeric_limits<__T1>::is_specialized && \ + std::numeric_limits<__T2>::is_specialized, \ + __retty>::type \ + __fn(__T1 __x, __T2 __y) { \ + return __fn((double)__x, (double)__y); \ + } + +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma) +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc); + +#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1 +#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2 + +// Overloads for functions that don't match the patterns expected by +// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}. +template +__DEVICE__ typename __clang_cuda_enable_if< + std::numeric_limits<__T1>::is_specialized && + std::numeric_limits<__T2>::is_specialized && + std::numeric_limits<__T3>::is_specialized, + double>::type +fma(__T1 __x, __T2 __y, __T3 __z) { + return std::fma((double)__x, (double)__y, (double)__z); +} + +template +__DEVICE__ typename __clang_cuda_enable_if::is_integer, + double>::type +frexp(__T __x, int *__exp) { + return std::frexp((double)__x, __exp); +} + +template +__DEVICE__ typename __clang_cuda_enable_if::is_integer, + double>::type +ldexp(__T __x, int __exp) { + return std::ldexp((double)__x, __exp); +} + +template +__DEVICE__ typename __clang_cuda_enable_if::is_integer, + double>::type +nexttoward(__T __from, double __to) { + return std::nexttoward((double)__from, __to); +} + +template +__DEVICE__ typename __clang_cuda_enable_if< + std::numeric_limits<__T1>::is_specialized && + std::numeric_limits<__T2>::is_specialized, + double>::type +remquo(__T1 __x, __T2 __y, int *__quo) { + return std::remquo((double)__x, (double)__y, __quo); +} + +template +__DEVICE__ typename __clang_cuda_enable_if::is_integer, + double>::type +scalbln(__T __x, long __exp) { + return std::scalbln((double)__x, __exp); +} + +template +__DEVICE__ typename __clang_cuda_enable_if::is_integer, + double>::type +scalbn(__T __x, int __exp) { + return std::scalbn((double)__x, __exp); +} + +namespace std { +// Pull the new overloads we defined above into namespace std. +using ::acos; +using ::acosh; +using ::asin; +using ::asinh; +using ::atan; +using ::atan2; +using ::atanh; +using ::cbrt; +using ::ceil; +using ::cos; +using ::cosh; +using ::erf; +using ::erfc; +using ::exp; +using ::exp2; +using ::expm1; +using ::fabs; +using ::floor; +using ::frexp; +using ::ilogb; +using ::ldexp; +using ::lgamma; +using ::llrint; +using ::llround; +using ::log; +using ::log10; +using ::log1p; +using ::log2; +using ::logb; +using ::lrint; +using ::lround; +using ::nexttoward; +using ::pow; +using ::remquo; +using ::scalbln; +using ::scalbn; +using ::sin; +using ::sinh; +using ::sqrt; +using ::tan; +using ::tanh; +using ::tgamma; + +// Finally, pull the "foobarf" functions that CUDA defines in its headers into +// namespace std. +using ::acosf; +using ::acoshf; +using ::asinf; +using ::asinhf; +using ::atan2f; +using ::atanf; +using ::atanhf; +using ::cbrtf; +using ::ceilf; +using ::copysignf; +using ::cosf; +using ::coshf; +using ::erfcf; +using ::erff; +using ::exp2f; +using ::expf; +using ::expm1f; +using ::fabsf; +using ::fdimf; +using ::floorf; +using ::fmaf; +using ::fmaxf; +using ::fminf; +using ::fmodf; +using ::frexpf; +using ::hypotf; +using ::ilogbf; +using ::ldexpf; +using ::lgammaf; +using ::llrintf; +using ::llroundf; +using ::log10f; +using ::log1pf; +using ::log2f; +using ::logbf; +using ::logf; +using ::lrintf; +using ::lroundf; +using ::modff; +using ::nearbyintf; +using ::nextafterf; +using ::nexttowardf; +using ::nexttowardf; +using ::powf; +using ::remainderf; +using ::remquof; +using ::rintf; +using ::roundf; +using ::scalblnf; +using ::scalbnf; +using ::sinf; +using ::sinhf; +using ::sqrtf; +using ::tanf; +using ::tanhf; +using ::tgammaf; +using ::truncf; +} + #undef __DEVICE__ #endif Index: cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h =================================================================== --- cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h +++ cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h @@ -140,6 +140,7 @@ __DEVICE__ long lrint(float); __DEVICE__ long lround(double); __DEVICE__ long lround(float); +__DEVICE__ long long llround(float); // No llround(double). __DEVICE__ double modf(double, double *); __DEVICE__ float modf(float, float *); __DEVICE__ double nan(const char *); @@ -149,7 +150,8 @@ __DEVICE__ double nextafter(double, double); __DEVICE__ float nextafter(float, float); __DEVICE__ double nexttoward(double, double); -__DEVICE__ float nexttoward(float, float); +__DEVICE__ float nexttoward(float, double); +__DEVICE__ float nexttowardf(float, double); __DEVICE__ double pow(double, double); __DEVICE__ double pow(double, int); __DEVICE__ float pow(float, float); @@ -235,6 +237,7 @@ using ::logb; using ::lrint; using ::lround; +using ::llround; using ::modf; using ::nan; using ::nanf;