Index: clang/lib/Sema/SemaOverload.cpp =================================================================== --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -12697,6 +12697,20 @@ DeclAccessPair DAP; SmallVector AmbiguousDecls; + auto CheckCUDAPreference = [&](FunctionDecl *FD1, + FunctionDecl *FD2) -> std::optional { + FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); + int Preference1 = IdentifyCUDAPreference(Caller, FD1); + int Preference2 = IdentifyCUDAPreference(Caller, FD2); + if (Preference1 > Preference2) { + return true; + } else if (Preference1 < Preference2) { + return false; + } else { + return std::nullopt; + } + }; + auto CheckMoreConstrained = [&](FunctionDecl *FD1, FunctionDecl *FD2) -> std::optional { if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction()) @@ -12727,9 +12741,33 @@ if (!checkAddressOfFunctionIsAvailable(FD)) continue; + // If we found a better result, update Result. + auto FoundBetter = [&]() { + IsResultAmbiguous = false; + DAP = I.getPair(); + Result = FD; + }; + // We have more than one result - see if it is more constrained than the // previous one. if (Result) { + // Check CUDA preference first. If the candidates have differennt CUDA + // preference, choose the one with higher CUDA preference. Otherwise, + // choose the one with more constraints. + if (getLangOpts().CUDA) { + std::optional MorePreferableByCUDA = + CheckCUDAPreference(FD, Result); + // If FD has different CUDA preference than Result. + if (MorePreferableByCUDA) { + // FD is less preferable than Result. + if (!*MorePreferableByCUDA) + continue; + // FD is more preferable than Result. + FoundBetter(); + } + } + // FD has the same CUDA prefernece than Result. Continue check + // constraints. std::optional MoreConstrainedThanPrevious = CheckMoreConstrained(FD, Result); if (!MoreConstrainedThanPrevious) { @@ -12741,9 +12779,7 @@ continue; // FD is more constrained - replace Result with it. } - IsResultAmbiguous = false; - DAP = I.getPair(); - Result = FD; + FoundBetter(); } if (IsResultAmbiguous) @@ -12753,9 +12789,15 @@ SmallVector ResultAC; // We skipped over some ambiguous declarations which might be ambiguous with // the selected result. - for (FunctionDecl *Skipped : AmbiguousDecls) + for (FunctionDecl *Skipped : AmbiguousDecls) { + // If skipped candidate has different CUDA preference than the result, + // there is no ambiguity. Otherwise check whether they have different + // constraints. + if (getLangOpts().CUDA && CheckCUDAPreference(Skipped, Result)) + continue; if (!CheckMoreConstrained(Skipped, Result)) return nullptr; + } Pair = DAP; } return Result; Index: clang/test/SemaCUDA/template-arg-deduction.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/template-arg-deduction.cu @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s + +// expected-no-diagnostics + +#include "Inputs/cuda.h" + +void foo(); +__device__ void foo(); + +template +void host_temp(F f); + +template +__device__ void device_temp(F f); + +void host_caller() { + host_temp(foo); +} + +__global__ void kernel_caller() { + device_temp(foo); +} + +__device__ void device_caller() { + device_temp(foo); +}