Index: cfe/trunk/include/clang/Sema/Sema.h =================================================================== --- cfe/trunk/include/clang/Sema/Sema.h +++ cfe/trunk/include/clang/Sema/Sema.h @@ -9420,7 +9420,8 @@ /// /// Use this rather than examining the function's attributes yourself -- you /// will get it wrong. Returns CFT_Host if D is null. - CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D); + CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D, + bool IgnoreImplicitHDAttr = false); CUDAFunctionTarget IdentifyCUDATarget(const AttributeList *Attr); /// Gets the CUDA target for the current context. @@ -9522,7 +9523,10 @@ /// Check whether NewFD is a valid overload for CUDA. Emits /// diagnostics and invalidates NewFD if not. - void checkCUDATargetOverload(FunctionDecl *NewFD, LookupResult &Previous); + void checkCUDATargetOverload(FunctionDecl *NewFD, + const LookupResult &Previous); + /// Copies target attributes from the template TD to the function FD. + void inheritCUDATargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD); /// \name Code completion //@{ Index: cfe/trunk/lib/Sema/SemaCUDA.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaCUDA.cpp +++ cfe/trunk/lib/Sema/SemaCUDA.cpp @@ -93,8 +93,17 @@ return CFT_Host; } +template +static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { + return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { + return isa(Attribute) && + !(IgnoreImplicitAttr && Attribute->isImplicit()); + }); +} + /// IdentifyCUDATarget - Determine the CUDA compilation target for this function -Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) { +Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, + bool IgnoreImplicitHDAttr) { // Code that lives outside a function is run on the host. if (D == nullptr) return CFT_Host; @@ -105,13 +114,13 @@ if (D->hasAttr()) return CFT_Global; - if (D->hasAttr()) { - if (D->hasAttr()) + if (hasAttr(D, IgnoreImplicitHDAttr)) { + if (hasAttr(D, IgnoreImplicitHDAttr)) return CFT_HostDevice; return CFT_Device; - } else if (D->hasAttr()) { + } else if (hasAttr(D, IgnoreImplicitHDAttr)) { return CFT_Host; - } else if (D->isImplicit()) { + } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { // Some implicit declarations (like intrinsic functions) are not marked. // Set the most lenient target on them for maximal flexibility. return CFT_HostDevice; @@ -856,7 +865,7 @@ } void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, - LookupResult &Previous) { + const LookupResult &Previous) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); for (NamedDecl *OldND : Previous) { @@ -883,3 +892,21 @@ } } } + +template +static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, + const FunctionDecl &TemplateFD) { + if (AttrTy *Attribute = TemplateFD.getAttr()) { + AttrTy *Clone = Attribute->clone(S.Context); + Clone->setInherited(true); + FD->addAttr(Clone); + } +} + +void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, + const FunctionTemplateDecl &TD) { + const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); + copyAttrIfPresent(*this, FD, TemplateFD); + copyAttrIfPresent(*this, FD, TemplateFD); + copyAttrIfPresent(*this, FD, TemplateFD); +} Index: cfe/trunk/lib/Sema/SemaDecl.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaDecl.cpp +++ cfe/trunk/lib/Sema/SemaDecl.cpp @@ -8305,9 +8305,6 @@ // Handle attributes. ProcessDeclAttributes(S, NewFD, D); - if (getLangOpts().CUDA) - maybeAddCUDAHostDeviceAttrs(NewFD, Previous); - if (getLangOpts().OpenCL) { // OpenCL v1.1 s6.5: Using an address space qualifier in a function return // type declaration will generate a compilation error. @@ -8410,6 +8407,15 @@ TemplateArgs.setRAngleLoc(D.getIdentifierLoc()); } + // We do not add HD attributes to specializations here because + // they may have different constexpr-ness compared to their + // templates and, after maybeAddCUDAHostDeviceAttrs() is applied, + // may end up with different effective targets. Instead, a + // specialization inherits its target attributes from its template + // in the CheckFunctionTemplateSpecialization() call below. + if (getLangOpts().CUDA & !isFunctionTemplateSpecialization) + maybeAddCUDAHostDeviceAttrs(NewFD, Previous); + // If it's a friend (and only if it's a friend), it's possible // that either the specialized function type or the specialized // template is dependent, and therefore matching will fail. In Index: cfe/trunk/lib/Sema/SemaTemplate.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaTemplate.cpp +++ cfe/trunk/lib/Sema/SemaTemplate.cpp @@ -7043,13 +7043,15 @@ continue; } - // Target attributes are part of function signature during cuda - // compilation, so deduced template must also have matching CUDA - // target. Given that regular template deduction does not take - // target attributes into account, we perform target match check - // here and reject candidates that have different target. + // Target attributes are part of the cuda function signature, so + // the deduced template's cuda target must match that of the + // specialization. Given that C++ template deduction does not + // take target attributes into account, we reject candidates + // here that have a different target. if (LangOpts.CUDA && - IdentifyCUDATarget(Specialization) != IdentifyCUDATarget(FD)) { + IdentifyCUDATarget(Specialization, + /* IgnoreImplicitHDAttributes = */ true) != + IdentifyCUDATarget(FD, /* IgnoreImplicitHDAttributes = */ true)) { FailedCandidates.addCandidate().set( I.getPair(), FunTmpl->getTemplatedDecl(), MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info)); @@ -7166,6 +7168,14 @@ SpecInfo->getTemplateSpecializationKind(), ExplicitTemplateArgs ? &ConvertedTemplateArgs[Specialization] : nullptr); + // A function template specialization inherits the target attributes + // of its template. (We require the attributes explicitly in the + // code to match, but a template may have implicit attributes by + // virtue e.g. of being constexpr, and it passes these implicit + // attributes on to its specializations.) + if (LangOpts.CUDA) + inheritCUDATargetAttrs(FD, *Specialization->getPrimaryTemplate()); + // The "previous declaration" for this function template specialization is // the prior function template specialization. Previous.clear(); @@ -8154,24 +8164,19 @@ continue; } - // Target attributes are part of function signature during cuda - // compilation, so deduced template must also have matching CUDA - // target. Given that regular template deduction does not take it - // into account, we perform target match check here and reject - // candidates that have different target. - if (LangOpts.CUDA) { - CUDAFunctionTarget DeclaratorTarget = IdentifyCUDATarget(Attr); - // We need to adjust target when HD is forced by - // #pragma clang force_cuda_host_device - if (ForceCUDAHostDeviceDepth > 0 && - (DeclaratorTarget == CFT_Device || DeclaratorTarget == CFT_Host)) - DeclaratorTarget = CFT_HostDevice; - if (IdentifyCUDATarget(Specialization) != DeclaratorTarget) { - FailedCandidates.addCandidate().set( - P.getPair(), FunTmpl->getTemplatedDecl(), - MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info)); - continue; - } + // Target attributes are part of the cuda function signature, so + // the cuda target of the instantiated function must match that of its + // template. Given that C++ template deduction does not take + // target attributes into account, we reject candidates here that + // have a different target. + if (LangOpts.CUDA && + IdentifyCUDATarget(Specialization, + /* IgnoreImplicitHDAttributes = */ true) != + IdentifyCUDATarget(Attr)) { + FailedCandidates.addCandidate().set( + P.getPair(), FunTmpl->getTemplatedDecl(), + MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info)); + continue; } Matches.addDecl(Specialization, P.getAccess()); Index: cfe/trunk/test/SemaCUDA/function-template-overload.cu =================================================================== --- cfe/trunk/test/SemaCUDA/function-template-overload.cu +++ cfe/trunk/test/SemaCUDA/function-template-overload.cu @@ -31,7 +31,8 @@ template <> __host__ HType overload_h_d(long a); // OK. instantiates H -// Can't overload HD template with H or D template, though functions are OK. +// Can't overload HD template with H or D template, though +// non-template functions are OK. template __host__ __device__ HDType overload_hd(T a) { return HDType(); } // expected-note@-1 {{previous declaration is here}} // expected-note@-2 2 {{candidate template ignored: could not match 'HDType' against 'HType'}} @@ -56,24 +57,54 @@ template __host__ __device__ HDType overload_h_d2(T a) { return HDType(); } template __device__ DType overload_h_d2(T1 a) { T1 x; T2 y; return DType(); } +// constexpr functions are implicitly HD, but explicit +// instantiation/specialization must use target attributes as written. +template constexpr T overload_ce_implicit_hd(T a) { return a+1; } +// expected-note@-1 3 {{candidate template ignored: target attributes do not match}} + +// These will not match the template. +template __host__ __device__ int overload_ce_implicit_hd(int a); +// expected-error@-1 {{explicit instantiation of 'overload_ce_implicit_hd' does not refer to a function template, variable template, member function, member class, or static data member}} +template <> __host__ __device__ long overload_ce_implicit_hd(long a); +// expected-error@-1 {{no function template matches function template specialization 'overload_ce_implicit_hd'}} +template <> __host__ __device__ constexpr long overload_ce_implicit_hd(long a); +// expected-error@-1 {{no function template matches function template specialization 'overload_ce_implicit_hd'}} + +// These should work, because template matching ignores the implicit +// HD attributes the compiler gives to constexpr functions/templates, +// so 'overload_ce_implicit_hd' template will match __host__ functions +// only. +template __host__ int overload_ce_implicit_hd(int a); +template <> __host__ long overload_ce_implicit_hd(long a); + +template float overload_ce_implicit_hd(float a); +template <> float* overload_ce_implicit_hd(float *a); +template <> constexpr double overload_ce_implicit_hd(double a) { return a + 3.0; }; + __host__ void hf() { overload_hd(13); + overload_ce_implicit_hd('h'); // Implicitly instantiated + overload_ce_implicit_hd(1.0f); // Explicitly instantiated + overload_ce_implicit_hd(2.0); // Explicitly specialized HType h = overload_h_d(10); HType h2i = overload_h_d2(11); HType h2ii = overload_h_d2(12); // These should be implicitly instantiated from __host__ template returning HType. - DType d = overload_h_d(20); // expected-error {{no viable conversion from 'HType' to 'DType'}} - DType d2i = overload_h_d2(21); // expected-error {{no viable conversion from 'HType' to 'DType'}} + DType d = overload_h_d(20); // expected-error {{no viable conversion from 'HType' to 'DType'}} + DType d2i = overload_h_d2(21); // expected-error {{no viable conversion from 'HType' to 'DType'}} DType d2ii = overload_h_d2(22); // expected-error {{no viable conversion from 'HType' to 'DType'}} } __device__ void df() { overload_hd(23); + overload_ce_implicit_hd('d'); // Implicitly instantiated + overload_ce_implicit_hd(1.0f); // Explicitly instantiated + overload_ce_implicit_hd(2.0); // Explicitly specialized // These should be implicitly instantiated from __device__ template returning DType. - HType h = overload_h_d(10); // expected-error {{no viable conversion from 'DType' to 'HType'}} - HType h2i = overload_h_d2(11); // expected-error {{no viable conversion from 'DType' to 'HType'}} + HType h = overload_h_d(10); // expected-error {{no viable conversion from 'DType' to 'HType'}} + HType h2i = overload_h_d2(11); // expected-error {{no viable conversion from 'DType' to 'HType'}} HType h2ii = overload_h_d2(12); // expected-error {{no viable conversion from 'DType' to 'HType'}} DType d = overload_h_d(20);