Index: clang/lib/Sema/SemaOverload.cpp =================================================================== --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -9475,6 +9475,35 @@ else if (!Cand1.Viable) return false; + // If Cand1 can be emitted and Cand2 cannot be emitted in the current context, + // Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2 can be + // emitted, Cand1 is not better than Cand2. This rule should have precedence + // over other rules. + // + // If both Cand1 and Cand2 can be emitted, or neither can be emitted, then + // other rules should be used to determine which is better. + // + // If other rules cannot determine which is better, CUDA preference will be + // used again to determine which is better. + // + // TODO: Currently IdentifyCUDAPreference does not return correct values + // for functions called in global variable initializers due to missing + // correct context about device/host. Therefore we can only enforce this + // rule when there is a caller. We should enforce this rule for functions + // in global variable initializers once proper context is added. + if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { + if (FunctionDecl *Caller = dyn_cast(S.CurContext)) { + auto Cand1Emittable = S.IdentifyCUDAPreference(Caller, Cand1.Function) > + Sema::CFP_WrongSide; + auto Cand2Emittable = S.IdentifyCUDAPreference(Caller, Cand2.Function) > + Sema::CFP_WrongSide; + if (Cand1Emittable && !Cand2Emittable) + return true; + if (!Cand1Emittable && Cand2Emittable) + return false; + } + } + // C++ [over.match.best]p1: // // -- if F is a static member function, ICS1(F) is defined such @@ -9709,12 +9738,6 @@ return Cmp == Comparison::Better; } - if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { - FunctionDecl *Caller = dyn_cast(S.CurContext); - return S.IdentifyCUDAPreference(Caller, Cand1.Function) > - S.IdentifyCUDAPreference(Caller, Cand2.Function); - } - bool HasPS1 = Cand1.Function != nullptr && functionHasPassObjectSizeParams(Cand1.Function); bool HasPS2 = Cand2.Function != nullptr && @@ -9722,7 +9745,19 @@ if (HasPS1 != HasPS2 && HasPS1) return true; - return isBetterMultiversionCandidate(Cand1, Cand2); + if (isBetterMultiversionCandidate(Cand1, Cand2)) + return true; + + // If other rules cannot determine which is better, CUDA preference is used + // to determine which is better. + if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { + if (FunctionDecl *Caller = dyn_cast(S.CurContext)) { + return S.IdentifyCUDAPreference(Caller, Cand1.Function) > + S.IdentifyCUDAPreference(Caller, Cand2.Function); + } + } + + return false; } /// Determine whether two declarations are "equivalent" for the purposes of @@ -9808,33 +9843,6 @@ std::transform(begin(), end(), std::back_inserter(Candidates), [](OverloadCandidate &Cand) { return &Cand; }); - // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA but - // are accepted by both clang and NVCC. However, during a particular - // compilation mode only one call variant is viable. We need to - // exclude non-viable overload candidates from consideration based - // only on their host/device attributes. Specifically, if one - // candidate call is WrongSide and the other is SameSide, we ignore - // the WrongSide candidate. - if (S.getLangOpts().CUDA) { - const FunctionDecl *Caller = dyn_cast(S.CurContext); - bool ContainsSameSideCandidate = - llvm::any_of(Candidates, [&](OverloadCandidate *Cand) { - // Check viable function only. - return Cand->Viable && Cand->Function && - S.IdentifyCUDAPreference(Caller, Cand->Function) == - Sema::CFP_SameSide; - }); - if (ContainsSameSideCandidate) { - auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) { - // Check viable function only to avoid unnecessary data copying/moving. - return Cand->Viable && Cand->Function && - S.IdentifyCUDAPreference(Caller, Cand->Function) == - Sema::CFP_WrongSide; - }; - llvm::erase_if(Candidates, IsWrongSideCandidate); - } - } - // Find the best viable function. Best = end(); for (auto *Cand : Candidates) { Index: clang/test/SemaCUDA/function-overload.cu =================================================================== --- clang/test/SemaCUDA/function-overload.cu +++ clang/test/SemaCUDA/function-overload.cu @@ -331,9 +331,6 @@ // If we have a mix of HD and H-only or D-only candidates in the overload set, // normal C++ overload resolution rules apply first. template TemplateReturnTy template_vs_hd_function(T arg) -#ifdef __CUDA_ARCH__ -//expected-note@-2 {{declared here}} -#endif { return TemplateReturnTy(); } @@ -342,11 +339,13 @@ } __host__ __device__ void test_host_device_calls_hd_template() { - HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); - TemplateReturnTy ret2 = template_vs_hd_function(1); #ifdef __CUDA_ARCH__ - // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function' in __host__ __device__ function}} + typedef HostDeviceReturnTy ExpectedReturnTy; +#else + typedef TemplateReturnTy ExpectedReturnTy; #endif + HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); + ExpectedReturnTy ret2 = template_vs_hd_function(1); } __host__ void test_host_calls_hd_template() {