Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -11656,7 +11656,8 @@ /// Use this rather than examining the function's attributes yourself -- you /// will get it wrong. Returns CFT_Host if D is null. CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D, - bool IgnoreImplicitHDAttr = false); + bool IgnoreImplicitHDAttr = false, + bool *IsImplicitHDAttr = nullptr); CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs); /// Gets the CUDA target for the current context. @@ -11683,9 +11684,12 @@ /// nullptr in case of global context. /// \param Callee target function /// + /// \param IsImplicitHD callee is an implicit host device function + /// /// \returns preference value for particular Caller/Callee combination. CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller, - const FunctionDecl *Callee); + const FunctionDecl *Callee, + bool *IsImplicitHD = nullptr); /// Determines whether Caller may invoke Callee, based on their CUDA /// host/device attributes. Returns false if the call is not allowed. Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -95,17 +95,25 @@ return CFT_Host; } -template -static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { - return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { - return isa(Attribute) && - !(IgnoreImplicitAttr && Attribute->isImplicit()); - }); +template +static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr, + bool *IsImplicitHDAttr = nullptr) { + if (auto *A = D->getAttr()) { + if (A->isImplicit()) { + if (IsImplicitHDAttr) + *IsImplicitHDAttr = true; + if (IgnoreImplicitAttr) + return false; + } + return true; + } + return false; } /// IdentifyCUDATarget - Determine the CUDA compilation target for this function Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, - bool IgnoreImplicitHDAttr) { + bool IgnoreImplicitHDAttr, + bool *IsImplicitHDAttr) { // Code that lives outside a function is run on the host. if (D == nullptr) return CFT_Host; @@ -116,15 +124,23 @@ if (D->hasAttr()) return CFT_Global; - if (hasAttr(D, IgnoreImplicitHDAttr)) { - if (hasAttr(D, IgnoreImplicitHDAttr)) + bool IsImplicitDevAttr = false; + bool IsImplicitHostAttr = false; + if (hasAttr(D, IgnoreImplicitHDAttr, &IsImplicitDevAttr)) { + if (hasAttr(D, IgnoreImplicitHDAttr, &IsImplicitHostAttr)) { + assert(IsImplicitDevAttr == IsImplicitHostAttr); + if (IsImplicitHDAttr) + *IsImplicitHDAttr = IsImplicitDevAttr && IsImplicitHostAttr; return CFT_HostDevice; + } return CFT_Device; } else if (hasAttr(D, IgnoreImplicitHDAttr)) { return CFT_Host; } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { // Some implicit declarations (like intrinsic functions) are not marked. // Set the most lenient target on them for maximal flexibility. + if (IsImplicitHDAttr) + *IsImplicitHDAttr = true; return CFT_HostDevice; } @@ -161,10 +177,12 @@ Sema::CUDAFunctionPreference Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, - const FunctionDecl *Callee) { + const FunctionDecl *Callee, bool *IsImplicitHD) { assert(Callee && "Callee must be valid."); CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); - CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); + CUDAFunctionTarget CalleeTarget = + IdentifyCUDATarget(Callee, + /*IgnoreImplicitHD=*/false, IsImplicitHD); // If one of the targets is invalid, the check always fails, no matter what // the other target is. Index: clang/lib/Sema/SemaOverload.cpp =================================================================== --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -9517,11 +9517,28 @@ // in global variable initializers once proper context is added. if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { if (FunctionDecl *Caller = dyn_cast(S.CurContext)) { - auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function); - auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function); + bool IsCallerImplicitHD = false; + bool IsCand1ImplicitHD = false; + bool IsCand2ImplicitHD = false; + S.IdentifyCUDATarget(Caller, /*IgnoreImplicitHD=*/false, + &IsCallerImplicitHD); + auto P1 = + S.IdentifyCUDAPreference(Caller, Cand1.Function, &IsCand1ImplicitHD); + auto P2 = + S.IdentifyCUDAPreference(Caller, Cand2.Function, &IsCand2ImplicitHD); assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never); - auto Cand1Emittable = P1 > Sema::CFP_WrongSide; - auto Cand2Emittable = P2 > Sema::CFP_WrongSide; + // The implicit HD function may be a function in a system header which + // is forced by pragma. If we prefer HD candidates over wrong-sided + // candidates, overloading resolution may change, which may result in + // non-deferrable diagnostics. As a workaround, we let implicit HD + // candidates take equal preference as wrong-sided candidates. This will + // preserve the overloading resolution. + auto EmitThreshold = + (IsCallerImplicitHD && (IsCand1ImplicitHD || IsCand2ImplicitHD)) + ? Sema::CFP_HostDevice + : Sema::CFP_WrongSide; + auto Cand1Emittable = P1 > EmitThreshold; + auto Cand2Emittable = P2 > EmitThreshold; if (Cand1Emittable && !Cand2Emittable) return true; if (!Cand1Emittable && Cand2Emittable) Index: clang/test/SemaCUDA/function-overload.cu =================================================================== --- clang/test/SemaCUDA/function-overload.cu +++ clang/test/SemaCUDA/function-overload.cu @@ -463,3 +463,30 @@ void foo() { __test(); } + +// Test resolving implicit host device candidate vs wrong-sided candidate. +// Implicit host device caller choose implicit host device candidate and +// wrong-sided candidate with equal preference. +#ifdef __CUDA_ARCH__ +namespace ImplicitHostDeviceVsWrongSided { +inline double callee(double x); +#pragma clang force_cuda_host_device begin +inline void callee(int x); +inline double implicit_hd_caller() { + return callee(1.0); +} +#pragma clang force_cuda_host_device end +} + +// Test resolving explicit host device candidate vs. wrong-sided candidate. +// Explicit host device caller favors host device candidate against wrong-sided +// candidate. +namespace ExplicitHostDeviceVsWrongSided { +inline double callee(double x); +inline __host__ __device__ void callee(int x); +inline __host__ __device__ double explicit_hd_caller() { + return callee(1.0); + // expected-error@-1 {{cannot initialize return object of type 'double' with an rvalue of type 'void'}} +} +} +#endif