Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -172,6 +172,7 @@ LANGOPT(CUDAIsDevice , 1, 0, "Compiling for CUDA device") LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "Allow variadic functions in CUDA device code") +LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "Treat unattributed constexpr functions as __host__ __device__") LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators") LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions") Index: include/clang/Driver/CC1Options.td =================================================================== --- include/clang/Driver/CC1Options.td +++ include/clang/Driver/CC1Options.td @@ -691,6 +691,8 @@ HelpText<"Incorporate CUDA device-side binary into host object file.">; def fcuda_allow_variadic_functions : Flag<["-"], "fcuda-allow-variadic-functions">, HelpText<"Allow variadic functions in CUDA device code.">; +def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr">, + HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">; //===----------------------------------------------------------------------===// // OpenMP Options Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -1560,6 +1560,9 @@ if (Args.hasArg(OPT_fcuda_allow_variadic_functions)) Opts.CUDAAllowVariadicFunctions = 1; + if (Args.hasArg(OPT_fno_cuda_host_device_constexpr)) + Opts.CUDAHostDeviceConstexpr = 0; + if (Opts.ObjC1) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { StringRef value = arg->getValue(); Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -8005,6 +8005,16 @@ // Handle attributes. ProcessDeclAttributes(S, NewFD, D); + // With CUDAHostDeviceConstexpr, unattributed constexpr functions are treated + // as implicitly __host__ __device__. Device-side variadic functions are not + // allowed, so we just treat those as host-only. + if (getLangOpts().CUDA && getLangOpts().CUDAHostDeviceConstexpr && + NewFD->isConstexpr() && !NewFD->isVariadic() && + !NewFD->hasAttr() && !NewFD->hasAttr()) { + NewFD->addAttr(CUDAHostAttr::CreateImplicit(Context)); + NewFD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + } + if (getLangOpts().OpenCL) { // OpenCL v1.1 s6.5: Using an address space qualifier in a function return // type declaration will generate a compilation error. Index: lib/Sema/SemaOverload.cpp =================================================================== --- lib/Sema/SemaOverload.cpp +++ lib/Sema/SemaOverload.cpp @@ -1126,13 +1126,10 @@ assert((OldTarget != CFT_InvalidTarget) && "Unexpected invalid target."); - // Don't allow mixing of HD with other kinds. This guarantees that - // we have only one viable function with this signature on any - // side of CUDA compilation . - // __global__ functions can't be overloaded based on attribute - // difference because, like HD, they also exist on both sides. - if ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || - (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) + // Don't allow __global__ functions to be overloaded with other functions, + // based solely on their CUDA attributes. This guarantees that we have only + // one viable function with this signature on any side of CUDA compilation. + if ((NewTarget == CFT_Global) || (OldTarget == CFT_Global)) return false; // Allow overloading of functions with same signature, but Index: test/SemaCUDA/function-overload.cu =================================================================== --- test/SemaCUDA/function-overload.cu +++ test/SemaCUDA/function-overload.cu @@ -39,22 +39,18 @@ __host__ HostReturnTy dh() { return HostReturnTy(); } __device__ DeviceReturnTy dh() { return DeviceReturnTy(); } -// H/HD and D/HD are not allowed. -__host__ __device__ int hdh() { return 0; } // expected-note {{previous definition is here}} -__host__ int hdh() { return 0; } // expected-error {{redefinition of 'hdh'}} +// H/HD and D/HD are also OK. +__host__ __device__ HostDeviceReturnTy hdh() { return HostDeviceReturnTy(); } +__host__ HostReturnTy hdh() { return HostReturnTy(); } -__host__ int hhd() { return 0; } // expected-note {{previous definition is here}} -__host__ __device__ int hhd() { return 0; } // expected-error {{redefinition of 'hhd'}} -// expected-warning@-1 {{attribute declaration must precede definition}} -// expected-note@-3 {{previous definition is here}} +__host__ HostReturnTy hhd() { return HostReturnTy(); } +__host__ __device__ HostDeviceReturnTy hhd() { return HostDeviceReturnTy(); } -__host__ __device__ int hdd() { return 0; } // expected-note {{previous definition is here}} -__device__ int hdd() { return 0; } // expected-error {{redefinition of 'hdd'}} +__host__ __device__ HostDeviceReturnTy hdd() { return HostDeviceReturnTy(); } +__device__ DeviceReturnTy hdd() { return DeviceReturnTy(); } -__device__ int dhd() { return 0; } // expected-note {{previous definition is here}} -__host__ __device__ int dhd() { return 0; } // expected-error {{redefinition of 'dhd'}} -// expected-warning@-1 {{attribute declaration must precede definition}} -// expected-note@-3 {{previous definition is here}} +__device__ DeviceReturnTy dhd() { return DeviceReturnTy(); } +__host__ __device__ HostDeviceReturnTy dhd() { return HostDeviceReturnTy(); } // Same tests for extern "C" functions. extern "C" __host__ int chh() { return 0; } // expected-note {{previous definition is here}} @@ -64,14 +60,12 @@ extern "C" __device__ DeviceReturnTy cdh() { return DeviceReturnTy(); } extern "C" __host__ HostReturnTy cdh() { return HostReturnTy(); } -// H/HD and D/HD overloading is not allowed. -extern "C" __host__ __device__ int chhd1() { return 0; } // expected-note {{previous definition is here}} -extern "C" __host__ int chhd1() { return 0; } // expected-error {{redefinition of 'chhd1'}} +// H/HD and D/HD overloading is OK. +extern "C" __host__ __device__ HostDeviceReturnTy chhd() { return HostDeviceReturnTy(); } +extern "C" __host__ HostReturnTy chhd() { return HostReturnTy(); } -extern "C" __host__ int chhd2() { return 0; } // expected-note {{previous definition is here}} -extern "C" __host__ __device__ int chhd2() { return 0; } // expected-error {{redefinition of 'chhd2'}} -// expected-warning@-1 {{attribute declaration must precede definition}} -// expected-note@-3 {{previous definition is here}} +extern "C" __host__ __device__ HostDeviceReturnTy chdd() { return HostDeviceReturnTy(); } +extern "C" __device__ DeviceReturnTy chdd() { return DeviceReturnTy(); } // Helper functions to verify calling restrictions. __device__ DeviceReturnTy d() { return DeviceReturnTy(); } @@ -118,6 +112,16 @@ HostFnPtr fp_cdh = cdh; HostReturnTy ret_cdh = cdh(); + HostFnPtr fp_hdh = hdh; + HostReturnTy ret_hdh = hdh(); + HostFnPtr fp_chhd = chhd; + HostReturnTy ret_chhd = chhd(); + + HostDeviceFnPtr fp_hdd = hdd; + HostDeviceReturnTy ret_hdd = hdd(); + HostDeviceFnPtr fp_chdd = chdd; + HostDeviceReturnTy ret_chdd = chdd(); + GlobalFnPtr fp_g = g; g(); // expected-error {{call to global function g not configured}} g<<<0, 0>>>(); @@ -139,6 +143,16 @@ DeviceFnPtr fp_cdh = cdh; DeviceReturnTy ret_cdh = cdh(); + HostDeviceFnPtr fp_hdh = hdh; + HostDeviceReturnTy ret_hdh = hdh(); + HostDeviceFnPtr fp_chhd = chhd; + HostDeviceReturnTy ret_chhd = chhd(); + + DeviceFnPtr fp_hdd = hdd; + DeviceReturnTy ret_hdd = hdd(); + DeviceFnPtr fp_chdd = chdd; + DeviceReturnTy ret_chdd = chdd(); + GlobalFnPtr fp_g = g; // expected-error {{reference to __global__ function 'g' in __device__ function}} g(); // expected-error {{no matching function for call to 'g'}} g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __device__ function}} @@ -160,6 +174,16 @@ DeviceFnPtr fp_cdh = cdh; DeviceReturnTy ret_cdh = cdh(); + HostDeviceFnPtr fp_hdh = hdh; + HostDeviceReturnTy ret_hdh = hdh(); + HostDeviceFnPtr fp_chhd = chhd; + HostDeviceReturnTy ret_chhd = chhd(); + + DeviceFnPtr fp_hdd = hdd; + DeviceReturnTy ret_hdd = hdd(); + DeviceFnPtr fp_chdd = chdd; + DeviceReturnTy ret_chdd = chdd(); + GlobalFnPtr fp_g = g; // expected-error {{reference to __global__ function 'g' in __global__ function}} g(); // expected-error {{no matching function for call to 'g'}} g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __global__ function}} @@ -181,6 +205,30 @@ CurrentFnPtr fp_cdh = cdh; CurrentReturnTy ret_cdh = cdh(); + // HDOrHostFoo is HostFoo if we're doing host compilation, and HDFoo + // otherwise. +#ifdef __CUDA_ARCH__ + typedef HostDeviceReturnTy HDOrHostReturnTy; + typedef HostDeviceFnPtr HDOrHostFnPtr; + typedef DeviceReturnTy HDOrDeviceReturnTy; + typedef DeviceFnPtr HDOrDeviceFnPtr; +#else + typedef HostReturnTy HDOrHostReturnTy; + typedef HostFnPtr HDOrHostFnPtr; + typedef HostDeviceReturnTy HDOrDeviceReturnTy; + typedef HostDeviceFnPtr HDOrDeviceFnPtr; +#endif + + HDOrHostFnPtr fp_hdh = hdh; + HDOrHostReturnTy ret_hdh = hdh(); + HDOrHostFnPtr fp_chhd = chhd; + HDOrHostReturnTy ret_chhd = chhd(); + + HDOrDeviceFnPtr fp_hdd = hdd; + HDOrDeviceReturnTy ret_hdd = hdd(); + HDOrDeviceFnPtr fp_chdd = chdd; + HDOrDeviceReturnTy ret_chdd = chdd(); + GlobalFnPtr fp_g = g; #if defined(__CUDA_ARCH__) // expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}} @@ -221,31 +269,31 @@ __host__ __device__ ~d_hd() {} }; -// Mixing H/D and HD is not allowed. +// Mixing H/D and HD is OK. struct d_dhhd { __device__ ~d_dhhd() {} - __host__ ~d_dhhd() {} // expected-note {{previous declaration is here}} - __host__ __device__ ~d_dhhd() {} // expected-error {{destructor cannot be redeclared}} + __host__ ~d_dhhd() {} + __host__ __device__ ~d_dhhd() {} }; struct d_hhd { - __host__ ~d_hhd() {} // expected-note {{previous declaration is here}} - __host__ __device__ ~d_hhd() {} // expected-error {{destructor cannot be redeclared}} + __host__ ~d_hhd() {} + __host__ __device__ ~d_hhd() {} }; struct d_hdh { - __host__ __device__ ~d_hdh() {} // expected-note {{previous declaration is here}} - __host__ ~d_hdh() {} // expected-error {{destructor cannot be redeclared}} + __host__ __device__ ~d_hdh() {} + __host__ ~d_hdh() {} }; struct d_dhd { - __device__ ~d_dhd() {} // expected-note {{previous declaration is here}} - __host__ __device__ ~d_dhd() {} // expected-error {{destructor cannot be redeclared}} + __device__ ~d_dhd() {} + __host__ __device__ ~d_dhd() {} }; struct d_hdd { - __host__ __device__ ~d_hdd() {} // expected-note {{previous declaration is here}} - __device__ ~d_hdd() {} // expected-error {{destructor cannot be redeclared}} + __host__ __device__ ~d_hdd() {} + __device__ ~d_hdd() {} }; // Test overloading of member functions @@ -266,23 +314,23 @@ }; struct m_hhd { - __host__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}} - __host__ __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}} + __host__ void operator delete(void *ptr) {} + __host__ __device__ void operator delete(void *ptr) {} }; struct m_hdh { - __host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}} - __host__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}} + __host__ __device__ void operator delete(void *ptr) {} + __host__ void operator delete(void *ptr) {} }; struct m_dhd { - __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}} - __host__ __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}} + __device__ void operator delete(void *ptr) {} + __host__ __device__ void operator delete(void *ptr) {} }; struct m_hdd { - __host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}} - __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}} + __host__ __device__ void operator delete(void *ptr) {} + __device__ void operator delete(void *ptr) {} }; // __global__ functions can't be overloaded based on attribute Index: test/SemaCUDA/host-device-constexpr.cu =================================================================== --- /dev/null +++ test/SemaCUDA/host-device-constexpr.cu @@ -0,0 +1,65 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s -fcuda-is-device + +#include "Inputs/cuda.h" + +// Opaque types used to determine which overload we're invoking. +struct HostReturnTy {}; +struct DeviceReturnTy {}; +struct HostDeviceReturnTy {}; + +// These shouldn't become host+device because they already have attributes. +__host__ constexpr int HostOnly() { return 0; } +// expected-note@-1 0+ {{not viable}} +__device__ constexpr int DeviceOnly() { return 0; } +// expected-note@-1 0+ {{not viable}} + +__host__ HostReturnTy Overloaded1(); +constexpr HostDeviceReturnTy Overloaded1() { return HostDeviceReturnTy(); } + +__device__ DeviceReturnTy Overloaded2(); +constexpr HostDeviceReturnTy Overloaded2() { return HostDeviceReturnTy(); } + +__host__ void HostFn() { + HostOnly(); + DeviceOnly(); // expected-error {{no matching function}} + HostReturnTy x = Overloaded1(); + HostDeviceReturnTy y = Overloaded2(); +} + +__device__ void DeviceFn() { + HostOnly(); // expected-error {{no matching function}} + DeviceOnly(); + HostDeviceReturnTy x = Overloaded1(); + DeviceReturnTy y = Overloaded2(); +} + +__host__ __device__ void HostDeviceFn() { +#ifdef __CUDA_ARCH__ + constexpr HostDeviceReturnTy x = Overloaded1(); + DeviceReturnTy y = Overloaded2(); +#else + HostReturnTy x = Overloaded1(); + constexpr HostDeviceReturnTy y = Overloaded2(); +#endif +} + +// Check that a constexpr function can overload a __device__ function, and +// that, in particular, we don't get errors if one of them is static and the +// other isn't. +static __device__ void f1(); +constexpr void f1(); + +__device__ void f2(); +static constexpr void f2(); + +// Different potential error depending on the order of declaration. +constexpr void f3(); +static __device__ void f3(); + +static constexpr void f4(); +__device__ void f4(); + +// Variadic device functions are not allowed, so this is just treated as +// host-only. +constexpr void variadic(const char*, ...); Index: test/SemaCUDA/no-host-device-constexpr.cu =================================================================== --- /dev/null +++ test/SemaCUDA/no-host-device-constexpr.cu @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fno-cuda-host-device-constexpr -verify %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fno-cuda-host-device-constexpr -fcuda-is-device -verify %s + +#include "Inputs/cuda.h" + +// Check that, with -fno-cuda-host-device-constexpr, constexpr functions are +// host-only, and __device__ constexpr functions are still device-only. + +constexpr int f() { return 0; } // expected-note {{not viable}} +__device__ constexpr int g() { return 0; } // expected-note {{not viable}} + +void __device__ foo() { + f(); // expected-error {{no matching function}} + g(); +} + +void __host__ foo() { + f(); + g(); // expected-error {{no matching function}} +}