Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -11671,6 +11671,8 @@ return IdentifyCUDATarget(dyn_cast(CurContext)); } + static bool IsCUDAImplicitHostDeviceFunction(const FunctionDecl *D); + // CUDA function call preference. Must be ordered numerically from // worst to best. enum CUDAFunctionPreference { Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -211,6 +211,20 @@ llvm_unreachable("All cases should've been handled by now."); } +template static bool hasImplicitAttr(const FunctionDecl *D) { + if (!D) + return false; + if (auto *A = D->getAttr()) + return A->isImplicit(); + return D->isImplicit(); +} + +bool Sema::IsCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { + bool IsImplicitDevAttr = hasImplicitAttr(D); + bool IsImplicitHostAttr = hasImplicitAttr(D); + return IsImplicitDevAttr && IsImplicitHostAttr; +} + void Sema::EraseUnwantedCUDAMatches( const FunctionDecl *Caller, SmallVectorImpl> &Matches) { Index: clang/lib/Sema/SemaOverload.cpp =================================================================== --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -9517,11 +9517,27 @@ // in global variable initializers once proper context is added. if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { if (FunctionDecl *Caller = dyn_cast(S.CurContext)) { + bool IsCallerImplicitHD = Sema::IsCUDAImplicitHostDeviceFunction(Caller); + bool IsCand1ImplicitHD = + Sema::IsCUDAImplicitHostDeviceFunction(Cand1.Function); + bool IsCand2ImplicitHD = + Sema::IsCUDAImplicitHostDeviceFunction(Cand2.Function); auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function); auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function); assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never); - auto Cand1Emittable = P1 > Sema::CFP_WrongSide; - auto Cand2Emittable = P2 > Sema::CFP_WrongSide; + // The implicit HD function may be a function in a system header which + // is forced by pragma. In device compilation, if we prefer HD candidates + // over wrong-sided candidates, overloading resolution may change, which + // may result in non-deferrable diagnostics. As a workaround, we let + // implicit HD candidates take equal preference as wrong-sided candidates. + // This will preserve the overloading resolution. + auto EmitThreshold = + (S.getLangOpts().CUDAIsDevice && IsCallerImplicitHD && + (IsCand1ImplicitHD || IsCand2ImplicitHD)) + ? Sema::CFP_HostDevice + : Sema::CFP_WrongSide; + auto Cand1Emittable = P1 > EmitThreshold; + auto Cand2Emittable = P2 > EmitThreshold; if (Cand1Emittable && !Cand2Emittable) return true; if (!Cand1Emittable && Cand2Emittable) Index: clang/test/SemaCUDA/function-overload.cu =================================================================== --- clang/test/SemaCUDA/function-overload.cu +++ clang/test/SemaCUDA/function-overload.cu @@ -1,8 +1,8 @@ // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s -// RUN: %clang_cc1 -std=c++11 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s +// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -std=c++14 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s #include "Inputs/cuda.h" @@ -14,6 +14,13 @@ struct HostDeviceReturnTy {}; struct TemplateReturnTy {}; +struct CorrectOverloadRetTy{}; +#if __CUDA_ARCH__ +// expected-note@-2 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'InCorrectOverloadRetTy' to 'const CorrectOverloadRetTy &' for 1st argument}} +// expected-note@-3 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'InCorrectOverloadRetTy' to 'CorrectOverloadRetTy &&' for 1st argument}} +#endif +struct InCorrectOverloadRetTy{}; + typedef HostReturnTy (*HostFnPtr)(); typedef DeviceReturnTy (*DeviceFnPtr)(); typedef HostDeviceReturnTy (*HostDeviceFnPtr)(); @@ -463,3 +470,74 @@ void foo() { __test(); } + +// Test resolving implicit host device candidate vs wrong-sided candidate. +// In device compilation, implicit host device caller choose implicit host +// device candidate and wrong-sided candidate with equal preference. +// Resolution result should not change with/without pragma. +namespace ImplicitHostDeviceVsWrongSided { +inline CorrectOverloadRetTy callee(double x); +#pragma clang force_cuda_host_device begin +inline InCorrectOverloadRetTy callee(int x); +inline CorrectOverloadRetTy implicit_hd_caller() { + return callee(1.0); +} +#pragma clang force_cuda_host_device end +} + +// Test resolving implicit host device candidate vs same-sided candidate. +// In host compilation, implicit host device caller choose implicit host +// device candidate and same-sided candidate with equal preference. +// Resolution result should not change with/without pragma. +namespace ImplicitHostDeviceVsSameSide { +inline InCorrectOverloadRetTy callee(int x); +#pragma clang force_cuda_host_device begin +inline CorrectOverloadRetTy callee(double x); +inline CorrectOverloadRetTy implicit_hd_caller() { + return callee(1.0); +} +#pragma clang force_cuda_host_device end +} + +// In the implicit host device function 'caller', the second 'callee' should be +// since it has better match, even though it is an implicit host device function +// whereas the first 'callee' is a host function. A diagnostic will be emitted +// if the first 'callee' is chosen since deduced return type cannot be used +// before it is defined. +namespace ImplicitHostDeviceByConstExpr { +template a b; +auto callee(...); +template constexpr auto callee(d) -> decltype(0); +struct e { + template static auto g(ad, f...) { + return h)...>; + } + struct i { + template static constexpr auto caller(f... k) { + return callee(k...); + } + }; + template static auto h() { + return i::caller; + } +}; +class l { + l() { + e::g([] {}, this); + } +}; +} + +// Test resolving explicit host device candidate vs. wrong-sided candidate. +// Explicit host device caller favors host device candidate against wrong-sided +// candidate. +namespace ExplicitHostDeviceVsWrongSided { +inline CorrectOverloadRetTy callee(double x); +inline __host__ __device__ InCorrectOverloadRetTy callee(int x); +inline __host__ __device__ CorrectOverloadRetTy explicit_hd_caller() { + return callee(1.0); +#if __CUDA_ARCH__ + // expected-error@-2 {{no viable conversion from returned value of type 'InCorrectOverloadRetTy' to function return type 'CorrectOverloadRetTy'}} +#endif +} +}