Index: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td +++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td @@ -6491,6 +6491,12 @@ def err_va_arg_in_device : Error< "CUDA device code does not support va_arg">; def err_alias_not_supported_on_nvptx : Error<"CUDA does not support aliases">; +def err_cuda_unattributed_constexpr_cannot_overload_device : Error< + "constexpr function '%0' without __host__ or __device__ attributes cannot " + "overload __device__ function with same signature. Add a __host__ " + "attribute, or build with -fno-cuda-host-device-constexpr.">; +def note_cuda_conflicting_device_function_declared_here : Note< + "conflicting __device__ function declared here">; def err_dynamic_var_init : Error< "dynamic initialization is not supported for " "__device__, __constant__, and __shared__ variables.">; Index: cfe/trunk/include/clang/Basic/LangOptions.def =================================================================== --- cfe/trunk/include/clang/Basic/LangOptions.def +++ cfe/trunk/include/clang/Basic/LangOptions.def @@ -172,6 +172,7 @@ LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device") LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA device code") +LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__") LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators") LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions") Index: cfe/trunk/include/clang/Driver/CC1Options.td =================================================================== --- cfe/trunk/include/clang/Driver/CC1Options.td +++ cfe/trunk/include/clang/Driver/CC1Options.td @@ -691,6 +691,8 @@ HelpText<"Incorporate CUDA device-side binary into host object file.">; def fcuda_allow_variadic_functions : Flag<["-"], "fcuda-allow-variadic-functions">, HelpText<"Allow variadic functions in CUDA device code.">; +def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr">, + HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">; //===----------------------------------------------------------------------===// // OpenMP Options Index: cfe/trunk/include/clang/Sema/Sema.h =================================================================== --- cfe/trunk/include/clang/Sema/Sema.h +++ cfe/trunk/include/clang/Sema/Sema.h @@ -2192,7 +2192,8 @@ const LookupResult &OldDecls, NamedDecl *&OldDecl, bool IsForUsingDecl); - bool IsOverload(FunctionDecl *New, FunctionDecl *Old, bool IsForUsingDecl); + bool IsOverload(FunctionDecl *New, FunctionDecl *Old, bool IsForUsingDecl, + bool ConsiderCudaAttrs = true); /// \brief Checks availability of the function depending on the current /// function context.Inside an unavailable function,unavailability is ignored. @@ -8904,6 +8905,11 @@ return IdentifyCUDAPreference(Caller, Callee) == CFP_Never; } + /// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD, + /// depending on FD and the current compilation settings. + void maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *FD, + const LookupResult &Previous); + /// Finds a function in \p Matches with highest calling priority /// from \p Caller context and erases all functions with lower /// calling priority. Index: cfe/trunk/lib/Frontend/CompilerInvocation.cpp =================================================================== --- cfe/trunk/lib/Frontend/CompilerInvocation.cpp +++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp @@ -1560,6 +1560,9 @@ if (Args.hasArg(OPT_fcuda_allow_variadic_functions)) Opts.CUDAAllowVariadicFunctions = 1; + if (Args.hasArg(OPT_fno_cuda_host_device_constexpr)) + Opts.CUDAHostDeviceConstexpr = 0; + if (Opts.ObjC1) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { StringRef value = arg->getValue(); Index: cfe/trunk/lib/Sema/SemaCUDA.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaCUDA.cpp +++ cfe/trunk/lib/Sema/SemaCUDA.cpp @@ -11,12 +11,14 @@ /// //===----------------------------------------------------------------------===// -#include "clang/Sema/Sema.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/ExprCXX.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Sema/Lookup.h" +#include "clang/Sema/Sema.h" #include "clang/Sema/SemaDiagnostic.h" +#include "clang/Sema/Template.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" using namespace clang; @@ -381,3 +383,50 @@ return true; } + +// With -fcuda-host-device-constexpr, an unattributed constexpr function is +// treated as implicitly __host__ __device__, unless: +// * it is a variadic function (device-side variadic functions are not +// allowed), or +// * a __device__ function with this signature was already declared, in which +// case in which case we output an error, unless the __device__ decl is in a +// system header, in which case we leave the constexpr function unattributed. +void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD, + const LookupResult &Previous) { + assert(getLangOpts().CUDA && "May be called only for CUDA compilations."); + if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || + NewD->isVariadic() || NewD->hasAttr() || + NewD->hasAttr() || NewD->hasAttr()) + return; + + // Is D a __device__ function with the same signature as NewD, ignoring CUDA + // attributes? + auto IsMatchingDeviceFn = [&](NamedDecl *D) { + if (UsingShadowDecl *Using = dyn_cast(D)) + D = Using->getTargetDecl(); + FunctionDecl *OldD = D->getAsFunction(); + return OldD && OldD->hasAttr() && + !OldD->hasAttr() && + !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, + /* ConsiderCudaAttrs = */ false); + }; + auto It = llvm::find_if(Previous, IsMatchingDeviceFn); + if (It != Previous.end()) { + // We found a __device__ function with the same name and signature as NewD + // (ignoring CUDA attrs). This is an error unless that function is defined + // in a system header, in which case we simply return without making NewD + // host+device. + NamedDecl *Match = *It; + if (!getSourceManager().isInSystemHeader(Match->getLocation())) { + Diag(NewD->getLocation(), + diag::err_cuda_unattributed_constexpr_cannot_overload_device) + << NewD->getName(); + Diag(Match->getLocation(), + diag::note_cuda_conflicting_device_function_declared_here); + } + return; + } + + NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); + NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); +} Index: cfe/trunk/lib/Sema/SemaDecl.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaDecl.cpp +++ cfe/trunk/lib/Sema/SemaDecl.cpp @@ -8009,6 +8009,9 @@ // Handle attributes. ProcessDeclAttributes(S, NewFD, D); + if (getLangOpts().CUDA) + maybeAddCUDAHostDeviceAttrs(S, NewFD, Previous); + if (getLangOpts().OpenCL) { // OpenCL v1.1 s6.5: Using an address space qualifier in a function return // type declaration will generate a compilation error. Index: cfe/trunk/lib/Sema/SemaOverload.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaOverload.cpp +++ cfe/trunk/lib/Sema/SemaOverload.cpp @@ -992,7 +992,7 @@ } bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old, - bool UseMemberUsingDeclRules) { + bool UseMemberUsingDeclRules, bool ConsiderCudaAttrs) { // C++ [basic.start.main]p2: This function shall not be overloaded. if (New->isMain()) return false; @@ -1125,7 +1125,7 @@ return true; } - if (getLangOpts().CUDA) { + if (getLangOpts().CUDA && ConsiderCudaAttrs) { CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New), OldTarget = IdentifyCUDATarget(Old); if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global) Index: cfe/trunk/test/SemaCUDA/Inputs/overload.h =================================================================== --- cfe/trunk/test/SemaCUDA/Inputs/overload.h +++ cfe/trunk/test/SemaCUDA/Inputs/overload.h @@ -0,0 +1,8 @@ +// This header is used by tests which are interested in __device__ functions +// which appear in a system header. + +__device__ int OverloadMe(); + +namespace ns { +using ::OverloadMe; +} Index: cfe/trunk/test/SemaCUDA/host-device-constexpr.cu =================================================================== --- cfe/trunk/test/SemaCUDA/host-device-constexpr.cu +++ cfe/trunk/test/SemaCUDA/host-device-constexpr.cu @@ -0,0 +1,69 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s -fcuda-is-device + +#include "Inputs/cuda.h" + +// Declares one function and pulls it into namespace ns: +// +// __device__ int OverloadMe(); +// namespace ns { using ::OverloadMe; } +// +// Clang cares that this is done in a system header. +#include + +// Opaque type used to determine which overload we're invoking. +struct HostReturnTy {}; + +// These shouldn't become host+device because they already have attributes. +__host__ constexpr int HostOnly() { return 0; } +// expected-note@-1 0+ {{not viable}} +__device__ constexpr int DeviceOnly() { return 0; } +// expected-note@-1 0+ {{not viable}} + +constexpr int HostDevice() { return 0; } + +// This should be a host-only function, because there's a previous __device__ +// overload in . +constexpr HostReturnTy OverloadMe() { return HostReturnTy(); } + +namespace ns { +// The "using" statement in overload.h should prevent OverloadMe from being +// implicitly host+device. +constexpr HostReturnTy OverloadMe() { return HostReturnTy(); } +} // namespace ns + +// This is an error, because NonSysHdrOverload was not defined in a system +// header. +__device__ int NonSysHdrOverload() { return 0; } +// expected-note@-1 {{conflicting __device__ function declared here}} +constexpr int NonSysHdrOverload() { return 0; } +// expected-error@-1 {{constexpr function 'NonSysHdrOverload' without __host__ or __device__ attributes}} + +// Variadic device functions are not allowed, so this is just treated as +// host-only. +constexpr void Variadic(const char*, ...); +// expected-note@-1 {{call to __host__ function from __device__ function}} + +__host__ void HostFn() { + HostOnly(); + DeviceOnly(); // expected-error {{no matching function}} + HostReturnTy x = OverloadMe(); + HostReturnTy y = ns::OverloadMe(); + Variadic("abc", 42); +} + +__device__ void DeviceFn() { + HostOnly(); // expected-error {{no matching function}} + DeviceOnly(); + int x = OverloadMe(); + int y = ns::OverloadMe(); + Variadic("abc", 42); // expected-error {{no matching function}} +} + +__host__ __device__ void HostDeviceFn() { +#ifdef __CUDA_ARCH__ + int y = OverloadMe(); +#else + constexpr HostReturnTy y = OverloadMe(); +#endif +} Index: cfe/trunk/test/SemaCUDA/no-host-device-constexpr.cu =================================================================== --- cfe/trunk/test/SemaCUDA/no-host-device-constexpr.cu +++ cfe/trunk/test/SemaCUDA/no-host-device-constexpr.cu @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fno-cuda-host-device-constexpr -verify %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fno-cuda-host-device-constexpr -fcuda-is-device -verify %s + +#include "Inputs/cuda.h" + +// Check that, with -fno-cuda-host-device-constexpr, constexpr functions are +// host-only, and __device__ constexpr functions are still device-only. + +constexpr int f() { return 0; } // expected-note {{not viable}} +__device__ constexpr int g() { return 0; } // expected-note {{not viable}} + +void __device__ foo() { + f(); // expected-error {{no matching function}} + g(); +} + +void __host__ foo() { + f(); + g(); // expected-error {{no matching function}} +}