Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -175,6 +175,7 @@ LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)") LANGOPT(CUDATargetOverloads, 1, 0, "Enable function overloads based on CUDA target attributes") LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "Allow variadic functions in CUDA device code") +LANGOPT(CUDARelaxedConstexpr, 1, 0, "Treat constexpr functions as __host__ __device__") LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators") LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions") Index: include/clang/Driver/CC1Options.td =================================================================== --- include/clang/Driver/CC1Options.td +++ include/clang/Driver/CC1Options.td @@ -699,6 +699,8 @@ HelpText<"Enable function overloads based on CUDA target attributes.">; def fcuda_allow_variadic_functions : Flag<["-"], "fcuda-allow-variadic-functions">, HelpText<"Allow variadic functions in CUDA device code.">; +def fcuda_relaxed_constexpr : Flag<["-"], "fcuda-relaxed-constexpr">, + HelpText<"Treat constexpr functions as __host__ __device__.">; //===----------------------------------------------------------------------===// // OpenMP Options Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -3594,6 +3594,7 @@ CmdArgs.push_back(Args.MakeArgString(AuxToolChain->getTriple().str())); CmdArgs.push_back("-fcuda-target-overloads"); CmdArgs.push_back("-fcuda-disable-target-call-checks"); + CmdArgs.push_back("-fcuda-relaxed-constexpr"); } if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm || Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -1569,6 +1569,9 @@ if (Args.hasArg(OPT_fcuda_allow_variadic_functions)) Opts.CUDAAllowVariadicFunctions = 1; + if (Args.hasArg(OPT_fcuda_relaxed_constexpr)) + Opts.CUDARelaxedConstexpr = 1; + if (Opts.ObjC1) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { StringRef value = arg->getValue(); Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -8006,6 +8006,15 @@ // Handle attributes. ProcessDeclAttributes(S, NewFD, D); + // With -fcuda-relaxed-constexpr, constexpr functions are treated as + // implicitly __host__ __device__. Device-side variadic functions are not + // allowed, so we just treat those as host-only. + if (getLangOpts().CUDA && NewFD->isConstexpr() && !NewFD->isVariadic() && + getLangOpts().CUDARelaxedConstexpr) { + NewFD->addAttr(CUDAHostAttr::CreateImplicit(Context)); + NewFD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + } + if (getLangOpts().OpenCL) { // OpenCL v1.1 s6.5: Using an address space qualifier in a function return // type declaration will generate a compilation error. Index: lib/Sema/SemaOverload.cpp =================================================================== --- lib/Sema/SemaOverload.cpp +++ lib/Sema/SemaOverload.cpp @@ -1126,13 +1126,10 @@ assert((OldTarget != CFT_InvalidTarget) && "Unexpected invalid target."); - // Don't allow mixing of HD with other kinds. This guarantees that - // we have only one viable function with this signature on any - // side of CUDA compilation . - // __global__ functions can't be overloaded based on attribute - // difference because, like HD, they also exist on both sides. - if ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || - (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) + // Don't allow __global__ functions to be overloaded with other functions, + // based solely on their CUDA attributes. This guarantees that we have only + // one viable function with this signature on any side of CUDA compilation. + if ((NewTarget == CFT_Global) || (OldTarget == CFT_Global)) return false; // Allow overloading of functions with same signature, but Index: test/SemaCUDA/relaxed-constexpr.cu =================================================================== --- /dev/null +++ test/SemaCUDA/relaxed-constexpr.cu @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s -fcuda-target-overloads +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s -fcuda-target-overloads -fcuda-is-device + +// expected-no-diagnostics + +#include "Inputs/cuda.h" + +static __device__ void f1(); +constexpr void f1(); + +__device__ void f2(); +static constexpr void f2(); + +// Different potential error depending on the order of declaration. +constexpr void f3(); +static __device__ void f3(); + +static constexpr void f4(); +__device__ void f4(); + +// Variadic device functions are not allowed, so this is just treated as +// host-only. +constexpr void variadic(const char*, ...);