Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -11667,6 +11667,8 @@ return IdentifyCUDATarget(dyn_cast(CurContext)); } + bool IsCUDAImplicitHostDeviceFunction(const FunctionDecl *D); + // CUDA function call preference. Must be ordered numerically from // worst to best. enum CUDAFunctionPreference { Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -211,6 +211,21 @@ llvm_unreachable("All cases should've been handled by now."); } +template static bool hasImplicitAttr(const FunctionDecl *D) { + if (!D) + return false; + if (auto *A = D->getAttr()) + if (A->isImplicit()) + return true; + return D->isImplicit(); +} + +bool Sema::IsCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { + bool IsImplicitDevAttr = hasImplicitAttr(D); + bool IsImplicitHostAttr = hasImplicitAttr(D); + return IsImplicitDevAttr && IsImplicitHostAttr; +} + void Sema::EraseUnwantedCUDAMatches( const FunctionDecl *Caller, SmallVectorImpl> &Matches) { Index: clang/lib/Sema/SemaOverload.cpp =================================================================== --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -9517,11 +9517,27 @@ // in global variable initializers once proper context is added. if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { if (FunctionDecl *Caller = dyn_cast(S.CurContext)) { + bool IsCallerImplicitHD = S.IsCUDAImplicitHostDeviceFunction(Caller); + bool IsCand1ImplicitHD = + S.IsCUDAImplicitHostDeviceFunction(Cand1.Function); + bool IsCand2ImplicitHD = + S.IsCUDAImplicitHostDeviceFunction(Cand2.Function); auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function); auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function); assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never); - auto Cand1Emittable = P1 > Sema::CFP_WrongSide; - auto Cand2Emittable = P2 > Sema::CFP_WrongSide; + // The implicit HD function may be a function in a system header which + // is forced by pragma. In device compilation, if we prefer HD candidates + // over wrong-sided candidates, overloading resolution may change, which + // may result in non-deferrable diagnostics. As a workaround, we let + // implicit HD candidates take equal preference as wrong-sided candidates. + // This will preserve the overloading resolution. + auto EmitThreshold = + (S.getLangOpts().CUDAIsDevice && IsCallerImplicitHD && + (IsCand1ImplicitHD || IsCand2ImplicitHD)) + ? Sema::CFP_HostDevice + : Sema::CFP_WrongSide; + auto Cand1Emittable = P1 > EmitThreshold; + auto Cand2Emittable = P2 > EmitThreshold; if (Cand1Emittable && !Cand2Emittable) return true; if (!Cand1Emittable && Cand2Emittable) Index: clang/test/SemaCUDA/function-overload.cu =================================================================== --- clang/test/SemaCUDA/function-overload.cu +++ clang/test/SemaCUDA/function-overload.cu @@ -1,8 +1,8 @@ // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s -// RUN: %clang_cc1 -std=c++11 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s +// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -std=c++14 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s #include "Inputs/cuda.h" @@ -463,3 +463,72 @@ void foo() { __test(); } + +// Test resolving implicit host device candidate vs wrong-sided candidate. +// In device compilation, implicit host device caller choose implicit host +// device candidate and wrong-sided candidate with equal preference. +namespace ImplicitHostDeviceVsWrongSided { +inline double callee(double x); +#pragma clang force_cuda_host_device begin +inline void callee(int x); +inline double implicit_hd_caller() { + return callee(1.0); +} +#pragma clang force_cuda_host_device end +} + +// Test resolving implicit host device candidate vs wrong-sided candidate. +// In host compilation, implicit host device caller choose implicit host +// device candidate and same-sided candidate with equal preference. +namespace ImplicitHostDeviceVsSameSide { +inline void callee(int x); +#pragma clang force_cuda_host_device begin +inline double callee(double x); +inline double implicit_hd_caller() { + return callee(1.0); +} +#pragma clang force_cuda_host_device end +} + +// In the implicit host device function 'caller', the second 'callee' should be +// since it has better match, even though it is an implicit host device function +// whereas the first 'callee' is a host function. A diagnostic will be emitted +// if the first 'callee' is chosen since deduced return type cannot be used +// before it is defined. +namespace ImplicitHostDeviceByConstExpr { +template a b; +auto callee(...); +template constexpr auto callee(d) -> decltype(0); +struct e { + template static auto g(ad, f...) { + return h)...>; + } + struct i { + template static constexpr auto caller(f... k) { + return callee(k...); + } + }; + template static auto h() { + return i::caller; + } +}; +class l { + l() { + e::g([] {}, this); + } +}; +} + +// Test resolving explicit host device candidate vs. wrong-sided candidate. +// Explicit host device caller favors host device candidate against wrong-sided +// candidate. +namespace ExplicitHostDeviceVsWrongSided { +inline double callee(double x); +inline __host__ __device__ void callee(int x); +inline __host__ __device__ double explicit_hd_caller() { + return callee(1.0); +#if __CUDA_ARCH__ + // expected-error@-2 {{cannot initialize return object of type 'double' with an rvalue of type 'void'}} +#endif +} +}