Index: lib/Headers/cuda_wrappers/algorithm =================================================================== --- lib/Headers/cuda_wrappers/algorithm +++ lib/Headers/cuda_wrappers/algorithm @@ -24,28 +24,36 @@ #ifndef __CLANG_CUDA_WRAPPERS_ALGORITHM #define __CLANG_CUDA_WRAPPERS_ALGORITHM -// This header defines __device__ overloads of std::min/max, but only if we're -// <= C++11. In C++14, these functions are constexpr, and so are implicitly -// __host__ __device__. +// This header defines __device__ overloads of std::min/max. // -// We don't support the initializer_list overloads because -// initializer_list::begin() and end() are not __host__ __device__ functions. +// Ideally we'd declare these functions only if we're <= C++11. In C++14, +// these functions are constexpr, and so are implicitly __host__ __device__. // -// When compiling in C++14 mode, we could force std::min/max to have different -// implementations for host and device, by declaring the device overloads -// before the constexpr overloads appear. We choose not to do this because - -// a) why write our own implementation when we can use one from the standard -// library? and -// b) libstdc++ is evil and declares min/max inside a header that is included -// *before* we include . So we'd have to unconditionally -// declare our __device__ overloads of min/max, but that would pollute -// things for people who choose not to include . +// However, the compiler being in C++14 mode does not imply that the standard +// library supports C++14. There is no macro we can test to check that the +// stdlib has constexpr std::min/max. Thus we have to unconditionally define +// our device overloads. +// +// A host+device function cannot be overloaded, and a constexpr function +// implicitly become host device if there's no explicitly host or device +// overload preceding it. So the simple thing to do would be to declare our +// device min/max overloads, and then #include_next . This way our +// device overloads would come first, and so if we have a C++14 stdlib, its +// min/max won't become host+device and conflict with our device overloads. +// +// But that also doesn't work. libstdc++ is evil and declares std::min/max in +// an internal header that is included *before* . Thus by the time +// we're inside of this file, std::min/max may already have been declared, and +// thus we can't prevent them from becoming host+device if they're constexpr. +// +// Therefore we perpetrate the following hack: We mark our __device__ overloads +// with __attribute__((enable_if(true, ""))). This causes the signature of the +// function to change without changing anything else about it. (Except that +// overload resolution will prefer it over the __host__ __device__ version +// rather than considering them equally good). #include_next -#if __cplusplus <= 201103L - // We need to define these overloads in exactly the namespace our standard // library uses (including the right inline namespace), otherwise they won't be // picked up by other functions in the standard library (e.g. functions in @@ -60,24 +68,28 @@ #endif template +__attribute__((enable_if(true, ""))) inline __device__ const __T & max(const __T &__a, const __T &__b, __Cmp __cmp) { return __cmp(__a, __b) ? __b : __a; } template +__attribute__((enable_if(true, ""))) inline __device__ const __T & max(const __T &__a, const __T &__b) { return __a < __b ? __b : __a; } template +__attribute__((enable_if(true, ""))) inline __device__ const __T & min(const __T &__a, const __T &__b, __Cmp __cmp) { return __cmp(__b, __a) ? __b : __a; } template +__attribute__((enable_if(true, ""))) inline __device__ const __T & min(const __T &__a, const __T &__b) { return __a < __b ? __a : __b; @@ -92,5 +104,4 @@ } // namespace std #endif -#endif // __cplusplus <= 201103L #endif // __CLANG_CUDA_WRAPPERS_ALGORITHM