Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -175,6 +175,7 @@ LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)") LANGOPT(CUDATargetOverloads, 1, 0, "Enable function overloads based on CUDA target attributes") LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "Allow variadic functions in CUDA device code") +LANGOPT(CUDARelaxedConstexpr, 1, 0, "Treat constexpr functions as __host__ __device__") LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators") LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions") Index: include/clang/Driver/CC1Options.td =================================================================== --- include/clang/Driver/CC1Options.td +++ include/clang/Driver/CC1Options.td @@ -699,6 +699,8 @@ HelpText<"Enable function overloads based on CUDA target attributes.">; def fcuda_allow_variadic_functions : Flag<["-"], "fcuda-allow-variadic-functions">, HelpText<"Allow variadic functions in CUDA device code.">; +def fcuda_relaxed_constexpr : Flag<["-"], "fcuda-relaxed-constexpr">, + HelpText<"Treat constexpr functions as __host__ __device__.">; //===----------------------------------------------------------------------===// // OpenMP Options Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -3594,6 +3594,7 @@ CmdArgs.push_back(Args.MakeArgString(AuxToolChain->getTriple().str())); CmdArgs.push_back("-fcuda-target-overloads"); CmdArgs.push_back("-fcuda-disable-target-call-checks"); + CmdArgs.push_back("-fcuda-relaxed-constexpr"); } if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm || Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -1569,6 +1569,9 @@ if (Args.hasArg(OPT_fcuda_allow_variadic_functions)) Opts.CUDAAllowVariadicFunctions = 1; + if (Args.hasArg(OPT_fcuda_relaxed_constexpr)) + Opts.CUDARelaxedConstexpr = 1; + if (Opts.ObjC1) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { StringRef value = arg->getValue(); Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -8006,6 +8006,15 @@ // Handle attributes. ProcessDeclAttributes(S, NewFD, D); + // With -fcuda-relaxed-constexpr, constexpr functions are treated as + // implicitly __host__ __device__. Device-side variadic functions are not + // allowed, so we just treat those as host-only. + if (getLangOpts().CUDA && NewFD->isConstexpr() && !NewFD->isVariadic() && + getLangOpts().CUDARelaxedConstexpr) { + NewFD->addAttr(CUDAHostAttr::CreateImplicit(Context)); + NewFD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + } + if (getLangOpts().OpenCL) { // OpenCL v1.1 s6.5: Using an address space qualifier in a function return // type declaration will generate a compilation error. Index: lib/Sema/SemaOverload.cpp =================================================================== --- lib/Sema/SemaOverload.cpp +++ lib/Sema/SemaOverload.cpp @@ -1126,13 +1126,10 @@ assert((OldTarget != CFT_InvalidTarget) && "Unexpected invalid target."); - // Don't allow mixing of HD with other kinds. This guarantees that - // we have only one viable function with this signature on any - // side of CUDA compilation . - // __global__ functions can't be overloaded based on attribute - // difference because, like HD, they also exist on both sides. - if ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || - (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) + // Don't allow __global__ functions to be overloaded with other functions, + // based solely on their CUDA attributes. This guarantees that we have only + // one viable function with this signature on any side of CUDA compilation. + if ((NewTarget == CFT_Global) || (OldTarget == CFT_Global)) return false; // Allow overloading of functions with same signature, but Index: test/SemaCUDA/function-overload.cu =================================================================== --- test/SemaCUDA/function-overload.cu +++ test/SemaCUDA/function-overload.cu @@ -49,22 +49,18 @@ __host__ HostReturnTy dh() { return HostReturnTy(); } __device__ DeviceReturnTy dh() { return DeviceReturnTy(); } -// H/HD and D/HD are not allowed. -__host__ __device__ int hdh() { return 0; } // expected-note {{previous definition is here}} -__host__ int hdh() { return 0; } // expected-error {{redefinition of 'hdh'}} +// H/HD and D/HD are also OK. +__host__ __device__ HostDeviceReturnTy hdh() { return HostDeviceReturnTy(); } +__host__ HostReturnTy hdh() { return HostReturnTy(); } -__host__ int hhd() { return 0; } // expected-note {{previous definition is here}} -__host__ __device__ int hhd() { return 0; } // expected-error {{redefinition of 'hhd'}} -// expected-warning@-1 {{attribute declaration must precede definition}} -// expected-note@-3 {{previous definition is here}} +__host__ HostReturnTy hhd() { return HostReturnTy(); } +__host__ __device__ HostDeviceReturnTy hhd() { return HostDeviceReturnTy(); } -__host__ __device__ int hdd() { return 0; } // expected-note {{previous definition is here}} -__device__ int hdd() { return 0; } // expected-error {{redefinition of 'hdd'}} +__host__ __device__ HostDeviceReturnTy hdd() { return HostDeviceReturnTy(); } +__device__ DeviceReturnTy hdd() { return DeviceReturnTy(); } -__device__ int dhd() { return 0; } // expected-note {{previous definition is here}} -__host__ __device__ int dhd() { return 0; } // expected-error {{redefinition of 'dhd'}} -// expected-warning@-1 {{attribute declaration must precede definition}} -// expected-note@-3 {{previous definition is here}} +__device__ DeviceReturnTy dhd() { return DeviceReturnTy(); } +__host__ __device__ HostDeviceReturnTy dhd() { return HostDeviceReturnTy(); } // Same tests for extern "C" functions. extern "C" __host__ int chh() { return 0; } // expected-note {{previous definition is here}} @@ -74,14 +70,12 @@ extern "C" __device__ DeviceReturnTy cdh() { return DeviceReturnTy(); } extern "C" __host__ HostReturnTy cdh() { return HostReturnTy(); } -// H/HD and D/HD overloading is not allowed. -extern "C" __host__ __device__ int chhd1() { return 0; } // expected-note {{previous definition is here}} -extern "C" __host__ int chhd1() { return 0; } // expected-error {{redefinition of 'chhd1'}} +// H/HD and D/HD overloading is OK. +extern "C" __host__ __device__ HostDeviceReturnTy chhd() { return HostDeviceReturnTy(); } +extern "C" __host__ HostReturnTy chhd() { return HostReturnTy(); } -extern "C" __host__ int chhd2() { return 0; } // expected-note {{previous definition is here}} -extern "C" __host__ __device__ int chhd2() { return 0; } // expected-error {{redefinition of 'chhd2'}} -// expected-warning@-1 {{attribute declaration must precede definition}} -// expected-note@-3 {{previous definition is here}} +extern "C" __host__ __device__ HostDeviceReturnTy chdd() { return HostDeviceReturnTy(); } +extern "C" __device__ DeviceReturnTy chdd() { return DeviceReturnTy(); } // Helper functions to verify calling restrictions. __device__ DeviceReturnTy d() { return DeviceReturnTy(); } @@ -128,6 +122,16 @@ HostFnPtr fp_cdh = cdh; HostReturnTy ret_cdh = cdh(); + HostFnPtr fp_hdh = hdh; + HostReturnTy ret_hdh = hdh(); + HostFnPtr fp_chhd = chhd; + HostReturnTy ret_chhd = chhd(); + + HostDeviceFnPtr fp_hdd = hdd; + HostDeviceReturnTy ret_hdd = hdd(); + HostDeviceFnPtr fp_chdd = chdd; + HostDeviceReturnTy ret_chdd = chdd(); + GlobalFnPtr fp_g = g; g(); // expected-error {{call to global function g not configured}} g<<<0, 0>>>(); @@ -149,6 +153,16 @@ DeviceFnPtr fp_cdh = cdh; DeviceReturnTy ret_cdh = cdh(); + HostDeviceFnPtr fp_hdh = hdh; + HostDeviceReturnTy ret_hdh = hdh(); + HostDeviceFnPtr fp_chhd = chhd; + HostDeviceReturnTy ret_chhd = chhd(); + + DeviceFnPtr fp_hdd = hdd; + DeviceReturnTy ret_hdd = hdd(); + DeviceFnPtr fp_chdd = chdd; + DeviceReturnTy ret_chdd = chdd(); + GlobalFnPtr fp_g = g; // expected-error {{reference to __global__ function 'g' in __device__ function}} g(); // expected-error {{no matching function for call to 'g'}} g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __device__ function}} @@ -170,6 +184,16 @@ DeviceFnPtr fp_cdh = cdh; DeviceReturnTy ret_cdh = cdh(); + HostDeviceFnPtr fp_hdh = hdh; + HostDeviceReturnTy ret_hdh = hdh(); + HostDeviceFnPtr fp_chhd = chhd; + HostDeviceReturnTy ret_chhd = chhd(); + + DeviceFnPtr fp_hdd = hdd; + DeviceReturnTy ret_hdd = hdd(); + DeviceFnPtr fp_chdd = chdd; + DeviceReturnTy ret_chdd = chdd(); + GlobalFnPtr fp_g = g; // expected-error {{reference to __global__ function 'g' in __global__ function}} g(); // expected-error {{no matching function for call to 'g'}} g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __global__ function}} @@ -203,6 +227,30 @@ CurrentFnPtr fp_cdh = cdh; CurrentReturnTy ret_cdh = cdh(); + // HDOrHostFoo is HostFoo if we're doing host compilation, and HDFoo + // otherwise. +#ifdef __CUDA_ARCH__ + typedef HostDeviceReturnTy HDOrHostReturnTy; + typedef HostDeviceFnPtr HDOrHostFnPtr; + typedef DeviceReturnTy HDOrDeviceReturnTy; + typedef DeviceFnPtr HDOrDeviceFnPtr; +#else + typedef HostReturnTy HDOrHostReturnTy; + typedef HostFnPtr HDOrHostFnPtr; + typedef HostDeviceReturnTy HDOrDeviceReturnTy; + typedef HostDeviceFnPtr HDOrDeviceFnPtr; +#endif + + HDOrHostFnPtr fp_hdh = hdh; + HDOrHostReturnTy ret_hdh = hdh(); + HDOrHostFnPtr fp_chhd = chhd; + HDOrHostReturnTy ret_chhd = chhd(); + + HDOrDeviceFnPtr fp_hdd = hdd; + HDOrDeviceReturnTy ret_hdd = hdd(); + HDOrDeviceFnPtr fp_chdd = chdd; + HDOrDeviceReturnTy ret_chdd = chdd(); + GlobalFnPtr fp_g = g; #if defined(__CUDA_ARCH__) // expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}} @@ -243,31 +291,31 @@ __host__ __device__ ~d_hd() {} }; -// Mixing H/D and HD is not allowed. +// Mixing H/D and HD is OK. struct d_dhhd { __device__ ~d_dhhd() {} - __host__ ~d_dhhd() {} // expected-note {{previous declaration is here}} - __host__ __device__ ~d_dhhd() {} // expected-error {{destructor cannot be redeclared}} + __host__ ~d_dhhd() {} + __host__ __device__ ~d_dhhd() {} }; struct d_hhd { - __host__ ~d_hhd() {} // expected-note {{previous declaration is here}} - __host__ __device__ ~d_hhd() {} // expected-error {{destructor cannot be redeclared}} + __host__ ~d_hhd() {} + __host__ __device__ ~d_hhd() {} }; struct d_hdh { - __host__ __device__ ~d_hdh() {} // expected-note {{previous declaration is here}} - __host__ ~d_hdh() {} // expected-error {{destructor cannot be redeclared}} + __host__ __device__ ~d_hdh() {} + __host__ ~d_hdh() {} }; struct d_dhd { - __device__ ~d_dhd() {} // expected-note {{previous declaration is here}} - __host__ __device__ ~d_dhd() {} // expected-error {{destructor cannot be redeclared}} + __device__ ~d_dhd() {} + __host__ __device__ ~d_dhd() {} }; struct d_hdd { - __host__ __device__ ~d_hdd() {} // expected-note {{previous declaration is here}} - __device__ ~d_hdd() {} // expected-error {{destructor cannot be redeclared}} + __host__ __device__ ~d_hdd() {} + __device__ ~d_hdd() {} }; // Test overloading of member functions @@ -288,23 +336,23 @@ }; struct m_hhd { - __host__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}} - __host__ __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}} + __host__ void operator delete(void *ptr) {} + __host__ __device__ void operator delete(void *ptr) {} }; struct m_hdh { - __host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}} - __host__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}} + __host__ __device__ void operator delete(void *ptr) {} + __host__ void operator delete(void *ptr) {} }; struct m_dhd { - __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}} - __host__ __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}} + __device__ void operator delete(void *ptr) {} + __host__ __device__ void operator delete(void *ptr) {} }; struct m_hdd { - __host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}} - __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}} + __host__ __device__ void operator delete(void *ptr) {} + __device__ void operator delete(void *ptr) {} }; // __global__ functions can't be overloaded based on attribute Index: test/SemaCUDA/relaxed-constexpr.cu =================================================================== --- /dev/null +++ test/SemaCUDA/relaxed-constexpr.cu @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s -fcuda-target-overloads -fcuda-relaxed-constexpr +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s -fcuda-target-overloads -fcuda-relaxed-constexpr -fcuda-is-device + +// expected-no-diagnostics + +#include "Inputs/cuda.h" + +static __device__ void f1(); +constexpr void f1(); + +__device__ void f2(); +static constexpr void f2(); + +// Different potential error depending on the order of declaration. +constexpr void f3(); +static __device__ void f3(); + +static constexpr void f4(); +__device__ void f4(); + +// Variadic device functions are not allowed, so this is just treated as +// host-only. +constexpr void variadic(const char*, ...);