Index: clang/include/clang/Basic/LangOptions.def =================================================================== --- clang/include/clang/Basic/LangOptions.def +++ clang/include/clang/Basic/LangOptions.def @@ -242,6 +242,7 @@ LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP") LANGOPT(GPUMaxThreadsPerBlock, 32, 256, "default max threads per block for kernel launch bounds for HIP") LANGOPT(GPUDeferDiag, 1, 0, "defer host/device related diagnostic messages for CUDA/HIP") +LANGOPT(FixOverloadResolution, 1, 0, "fix host/device related overloading resolution for CUDA/HIP") LANGOPT(SYCL , 1, 0, "SYCL") LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device") Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -712,6 +712,9 @@ defm gpu_defer_diag : OptInFFlag<"gpu-defer-diag", "Defer", "Don't defer", " host/device related diagnostic messages" " for CUDA/HIP">; +defm fix_overload_resolution : OptInFFlag<"fix-overload-resolution", + "Fix", "Don't fix", " host/device related overloading resolution" + " for CUDA/HIP">; def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">, Flags<[CC1Option]>, HelpText<"Default max threads per block for kernel launch bounds for HIP">; Index: clang/include/clang/Sema/Overload.h =================================================================== --- clang/include/clang/Sema/Overload.h +++ clang/include/clang/Sema/Overload.h @@ -1051,6 +1051,9 @@ void destroyCandidates(); + /// Whether diagnostics should be deferred. + bool shouldDeferDiags(Sema &S, ArrayRef Args, SourceLocation OpLoc); + public: OverloadCandidateSet(SourceLocation Loc, CandidateSetKind CSK, OperatorRewriteInfo RewriteInfo = {}) Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -5593,6 +5593,11 @@ if (Args.hasFlag(options::OPT_fgpu_defer_diag, options::OPT_fno_gpu_defer_diag, false)) CmdArgs.push_back("-fgpu-defer-diag"); + if (Args.hasFlag(options::OPT_ffix_overload_resolution, + options::OPT_fno_fix_overload_resolution, false)) { + CmdArgs.push_back("-ffix-overload-resolution"); + CmdArgs.push_back("-fgpu-defer-diag"); + } } if (Arg *A = Args.getLastArg(options::OPT_fcf_protection_EQ)) { Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -2684,6 +2684,9 @@ if (Args.hasArg(OPT_fno_cuda_host_device_constexpr)) Opts.CUDAHostDeviceConstexpr = 0; + if (Args.hasArg(OPT_ffix_overload_resolution)) + Opts.FixOverloadResolution = 1; + if (Args.hasArg(OPT_fgpu_defer_diag)) Opts.GPUDeferDiag = 1; Index: clang/lib/Sema/SemaOverload.cpp =================================================================== --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -9616,6 +9616,75 @@ else if (!Cand1.Viable) return false; + // [CUDA] A function with 'never' preference is marked not viable, therefore + // is never shown up here. The worst preference shown up here is 'wrong side', + // e.g. a host function called by a device host function in device + // compilation. This is valid AST as long as the host device function is not + // emitted, e.g. it is an inline function which is called only by a host + // function. A deferred diagnostic will be triggered if it is emitted. + // However a wrong-sided function is still a viable candidate here. + // + // If Cand1 can be emitted and Cand2 cannot be emitted in the current + // context, Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2 + // can be emitted, Cand1 is not better than Cand2. This rule should have + // precedence over other rules. + // + // If both Cand1 and Cand2 can be emitted, or neither can be emitted, then + // other rules should be used to determine which is better. This is because + // host/device based overloading resolution is mostly for determining + // viability of a function. If two functions are both viable, other factors + // should take precedence in preference, e.g. the standard-defined preferences + // like argument conversion ranks or enable_if partial-ordering. The + // preference for pass-object-size parameters is probably most similar to a + // type-based-overloading decision and so should take priority. + // + // If other rules cannot determine which is better, CUDA preference will be + // used again to determine which is better. + // + // TODO: Currently IdentifyCUDAPreference does not return correct values + // for functions called in global variable initializers due to missing + // correct context about device/host. Therefore we can only enforce this + // rule when there is a caller. We should enforce this rule for functions + // in global variable initializers once proper context is added. + // + // TODO: We can only enable the hostness based overloading resolution when + // -ffix-overload-resolution is on since this requires deferring overloading + // resolution diagnostics. + if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function && + S.getLangOpts().FixOverloadResolution) { + if (FunctionDecl *Caller = dyn_cast(S.CurContext)) { + bool IsCallerImplicitHD = Sema::isCUDAImplicitHostDeviceFunction(Caller); + bool IsCand1ImplicitHD = + Sema::isCUDAImplicitHostDeviceFunction(Cand1.Function); + bool IsCand2ImplicitHD = + Sema::isCUDAImplicitHostDeviceFunction(Cand2.Function); + auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function); + auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function); + assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never); + // The implicit HD function may be a function in a system header which + // is forced by pragma. In device compilation, if we prefer HD candidates + // over wrong-sided candidates, overloading resolution may change, which + // may result in non-deferrable diagnostics. As a workaround, we let + // implicit HD candidates take equal preference as wrong-sided candidates. + // This will preserve the overloading resolution. + // TODO: We still need special handling of implicit HD functions since + // they may incur other diagnostics to be deferred. We should make all + // host/device related diagnostics deferrable and remove special handling + // of implicit HD functions. + auto EmitThreshold = + (S.getLangOpts().CUDAIsDevice && IsCallerImplicitHD && + (IsCand1ImplicitHD || IsCand2ImplicitHD)) + ? Sema::CFP_Never + : Sema::CFP_WrongSide; + auto Cand1Emittable = P1 > EmitThreshold; + auto Cand2Emittable = P2 > EmitThreshold; + if (Cand1Emittable && !Cand2Emittable) + return true; + if (!Cand1Emittable && Cand2Emittable) + return false; + } + } + // C++ [over.match.best]p1: // // -- if F is a static member function, ICS1(F) is defined such @@ -9850,12 +9919,6 @@ return Cmp == Comparison::Better; } - if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { - FunctionDecl *Caller = dyn_cast(S.CurContext); - return S.IdentifyCUDAPreference(Caller, Cand1.Function) > - S.IdentifyCUDAPreference(Caller, Cand2.Function); - } - bool HasPS1 = Cand1.Function != nullptr && functionHasPassObjectSizeParams(Cand1.Function); bool HasPS2 = Cand2.Function != nullptr && @@ -9863,8 +9926,21 @@ if (HasPS1 != HasPS2 && HasPS1) return true; - Comparison MV = isBetterMultiversionCandidate(Cand1, Cand2); - return MV == Comparison::Better; + auto MV = isBetterMultiversionCandidate(Cand1, Cand2); + if (MV == Comparison::Better) + return true; + if (MV == Comparison::Worse) + return false; + + // If other rules cannot determine which is better, CUDA preference is used + // to determine which is better. + if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { + FunctionDecl *Caller = dyn_cast(S.CurContext); + return S.IdentifyCUDAPreference(Caller, Cand1.Function) > + S.IdentifyCUDAPreference(Caller, Cand2.Function); + } + + return false; } /// Determine whether two declarations are "equivalent" for the purposes of @@ -9957,7 +10033,10 @@ // only on their host/device attributes. Specifically, if one // candidate call is WrongSide and the other is SameSide, we ignore // the WrongSide candidate. - if (S.getLangOpts().CUDA) { + // We only need to remove wrong-sided candidates here if + // -ffix-overload-resolution is off. When -ffix-overload-resolution is on, + // all candidates are compared uniformly in isBetterOverloadCandidate. + if (S.getLangOpts().CUDA && !S.getLangOpts().FixOverloadResolution) { const FunctionDecl *Caller = dyn_cast(S.CurContext); bool ContainsSameSideCandidate = llvm::any_of(Candidates, [&](OverloadCandidate *Cand) { @@ -11620,26 +11699,34 @@ return Cands; } -/// When overload resolution fails, prints diagnostic messages containing the -/// candidates in the candidate set. -void OverloadCandidateSet::NoteCandidates(PartialDiagnosticAt PD, - Sema &S, OverloadCandidateDisplayKind OCD, ArrayRef Args, - StringRef Opc, SourceLocation OpLoc, - llvm::function_ref Filter) { - +bool OverloadCandidateSet::shouldDeferDiags(Sema &S, ArrayRef Args, + SourceLocation OpLoc) { bool DeferHint = false; if (S.getLangOpts().CUDA && S.getLangOpts().GPUDeferDiag) { - // Defer diagnostic for CUDA/HIP if there are wrong-sided candidates. + // Defer diagnostic for CUDA/HIP if there are wrong-sided candidates or + // host device candidates. auto WrongSidedCands = CompleteCandidates(S, OCD_AllCandidates, Args, OpLoc, [](auto &Cand) { - return Cand.Viable == false && - Cand.FailureKind == ovl_fail_bad_target; + return (Cand.Viable == false && + Cand.FailureKind == ovl_fail_bad_target) || + (Cand.Function->template hasAttr() && + Cand.Function->template hasAttr()); }); DeferHint = WrongSidedCands.size(); } + return DeferHint; +} + +/// When overload resolution fails, prints diagnostic messages containing the +/// candidates in the candidate set. +void OverloadCandidateSet::NoteCandidates( + PartialDiagnosticAt PD, Sema &S, OverloadCandidateDisplayKind OCD, + ArrayRef Args, StringRef Opc, SourceLocation OpLoc, + llvm::function_ref Filter) { + auto Cands = CompleteCandidates(S, OCD, Args, OpLoc, Filter); - S.Diag(PD.first, PD.second, DeferHint); + S.Diag(PD.first, PD.second, shouldDeferDiags(S, Args, OpLoc)); NoteCandidates(S, Args, Cands, Opc, OpLoc); @@ -11691,7 +11778,9 @@ } if (I != E) - S.Diag(OpLoc, diag::note_ovl_too_many_candidates) << int(E - I); + S.Diag(OpLoc, diag::note_ovl_too_many_candidates, + shouldDeferDiags(S, Args, OpLoc)) + << int(E - I); } static SourceLocation Index: clang/test/Driver/hip-options.hip =================================================================== --- clang/test/Driver/hip-options.hip +++ clang/test/Driver/hip-options.hip @@ -35,3 +35,8 @@ // RUN: %clang -### -nogpuinc -nogpulib -munsafe-fp-atomics \ // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s // UNSAFE-FP-ATOMICS: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-munsafe-fp-atomics" + +// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib -ffix-overload-resolution \ +// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=FIX-OVERLOAD %s +// FIX-OVERLOAD: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-ffix-overload-resolution" "-fgpu-defer-diag" +// FIX-OVERLOAD: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-ffix-overload-resolution" "-fgpu-defer-diag" Index: clang/test/SemaCUDA/deferred-oeverload.cu =================================================================== --- clang/test/SemaCUDA/deferred-oeverload.cu +++ clang/test/SemaCUDA/deferred-oeverload.cu @@ -54,7 +54,7 @@ // This fails to substitue for A but no diagnostic // should be emitted. template -__host__ __device__ void sfinae(T t) { // com-note {{candidate template ignored: substitution failure [with T = B]}} +__host__ __device__ void sfinae(T t) { // host-note {{candidate template ignored: substitution failure [with T = B]}} t.x = 1; } @@ -64,13 +64,13 @@ // file scope. template -__host__ __device__ void sfinae(T t) { // com-note {{candidate template ignored: substitution failure [with T = B]}} +__host__ __device__ void sfinae(T t) { // host-note {{candidate template ignored: substitution failure [with T = B]}} t.x = 1; } void test_sfinae() { sfinae(A()); - sfinae(B()); // com-error{{no matching function for call to 'sfinae'}} + sfinae(B()); // host-error{{no matching function for call to 'sfinae'}} } // Make sure throw is diagnosed in OpenMP parallel region in host function. Index: clang/test/SemaCUDA/function-overload.cu =================================================================== --- clang/test/SemaCUDA/function-overload.cu +++ clang/test/SemaCUDA/function-overload.cu @@ -1,8 +1,16 @@ // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify=host,expected %s -// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify=dev,expected %s +// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -verify=host,hostdefer,devdefer,expected %s +// RUN: %clang_cc1 -std=c++14 -triple nvptx64-nvidia-cuda -fsyntax-only \ +// RUN: -fcuda-is-device -verify=dev,devnodeferonly,hostdefer,devdefer,expected %s +// RUN: %clang_cc1 -ffix-overload-resolution -fgpu-defer-diag -DDEFER=1 \ +// RUN: -std=c++14 -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -verify=host,hostdefer,expected %s +// RUN: %clang_cc1 -ffix-overload-resolution -fgpu-defer-diag -DDEFER=1 \ +// RUN: -std=c++14 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device \ +// RUN: -verify=dev,devdeferonly,devdefer,expected %s #include "Inputs/cuda.h" @@ -76,37 +84,37 @@ // Helper functions to verify calling restrictions. __device__ DeviceReturnTy d() { return DeviceReturnTy(); } // host-note@-1 1+ {{'d' declared here}} -// expected-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}} +// hostdefer-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}} // expected-note@-3 0+ {{candidate function not viable: call to __device__ function from __host__ __device__ function}} __host__ HostReturnTy h() { return HostReturnTy(); } // dev-note@-1 1+ {{'h' declared here}} -// expected-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}} +// devdefer-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}} // expected-note@-3 0+ {{candidate function not viable: call to __host__ function from __host__ __device__ function}} -// expected-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}} +// devdefer-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}} __global__ void g() {} // dev-note@-1 1+ {{'g' declared here}} -// expected-note@-2 1+ {{candidate function not viable: call to __global__ function from __device__ function}} +// devdefer-note@-2 1+ {{candidate function not viable: call to __global__ function from __device__ function}} // expected-note@-3 0+ {{candidate function not viable: call to __global__ function from __host__ __device__ function}} -// expected-note@-4 1+ {{candidate function not viable: call to __global__ function from __global__ function}} +// devdefer-note@-4 1+ {{candidate function not viable: call to __global__ function from __global__ function}} extern "C" __device__ DeviceReturnTy cd() { return DeviceReturnTy(); } // host-note@-1 1+ {{'cd' declared here}} -// expected-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}} +// hostdefer-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}} // expected-note@-3 0+ {{candidate function not viable: call to __device__ function from __host__ __device__ function}} extern "C" __host__ HostReturnTy ch() { return HostReturnTy(); } // dev-note@-1 1+ {{'ch' declared here}} -// expected-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}} +// devdefer-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}} // expected-note@-3 0+ {{candidate function not viable: call to __host__ function from __host__ __device__ function}} -// expected-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}} +// devdefer-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}} __host__ void hostf() { DeviceFnPtr fp_d = d; // host-error {{reference to __device__ function 'd' in __host__ function}} - DeviceReturnTy ret_d = d(); // expected-error {{no matching function for call to 'd'}} + DeviceReturnTy ret_d = d(); // hostdefer-error {{no matching function for call to 'd'}} DeviceFnPtr fp_cd = cd; // host-error {{reference to __device__ function 'cd' in __host__ function}} - DeviceReturnTy ret_cd = cd(); // expected-error {{no matching function for call to 'cd'}} + DeviceReturnTy ret_cd = cd(); // hostdefer-error {{no matching function for call to 'cd'}} HostFnPtr fp_h = h; HostReturnTy ret_h = h(); @@ -130,9 +138,9 @@ DeviceReturnTy ret_cd = cd(); HostFnPtr fp_h = h; // dev-error {{reference to __host__ function 'h' in __device__ function}} - HostReturnTy ret_h = h(); // expected-error {{no matching function for call to 'h'}} + HostReturnTy ret_h = h(); // devdefer-error {{no matching function for call to 'h'}} HostFnPtr fp_ch = ch; // dev-error {{reference to __host__ function 'ch' in __device__ function}} - HostReturnTy ret_ch = ch(); // expected-error {{no matching function for call to 'ch'}} + HostReturnTy ret_ch = ch(); // devdefer-error {{no matching function for call to 'ch'}} DeviceFnPtr fp_dh = dh; DeviceReturnTy ret_dh = dh(); @@ -140,7 +148,7 @@ DeviceReturnTy ret_cdh = cdh(); GlobalFnPtr fp_g = g; // dev-error {{reference to __global__ function 'g' in __device__ function}} - g(); // expected-error {{no matching function for call to 'g'}} + g(); // devdefer-error {{no matching function for call to 'g'}} g<<<0,0>>>(); // dev-error {{reference to __global__ function 'g' in __device__ function}} } @@ -151,9 +159,9 @@ DeviceReturnTy ret_cd = cd(); HostFnPtr fp_h = h; // dev-error {{reference to __host__ function 'h' in __global__ function}} - HostReturnTy ret_h = h(); // expected-error {{no matching function for call to 'h'}} + HostReturnTy ret_h = h(); // devdefer-error {{no matching function for call to 'h'}} HostFnPtr fp_ch = ch; // dev-error {{reference to __host__ function 'ch' in __global__ function}} - HostReturnTy ret_ch = ch(); // expected-error {{no matching function for call to 'ch'}} + HostReturnTy ret_ch = ch(); // devdefer-error {{no matching function for call to 'ch'}} DeviceFnPtr fp_dh = dh; DeviceReturnTy ret_dh = dh(); @@ -161,7 +169,7 @@ DeviceReturnTy ret_cdh = cdh(); GlobalFnPtr fp_g = g; // dev-error {{reference to __global__ function 'g' in __global__ function}} - g(); // expected-error {{no matching function for call to 'g'}} + g(); // devdefer-error {{no matching function for call to 'g'}} g<<<0,0>>>(); // dev-error {{reference to __global__ function 'g' in __global__ function}} } @@ -184,7 +192,7 @@ #if defined(__CUDA_ARCH__) // expected-error@-5 {{reference to __host__ function 'h' in __host__ __device__ function}} // expected-error@-5 {{reference to __host__ function 'h' in __host__ __device__ function}} - // expected-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}} + // devdefer-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}} // expected-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}} #endif @@ -331,9 +339,7 @@ // If we have a mix of HD and H-only or D-only candidates in the overload set, // normal C++ overload resolution rules apply first. template TemplateReturnTy template_vs_hd_function(T arg) -#ifdef __CUDA_ARCH__ -//expected-note@-2 {{declared here}} -#endif +// devnodeferonly-note@-1{{'template_vs_hd_function' declared here}} { return TemplateReturnTy(); } @@ -342,11 +348,14 @@ } __host__ __device__ void test_host_device_calls_hd_template() { - HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); - TemplateReturnTy ret2 = template_vs_hd_function(1); -#ifdef __CUDA_ARCH__ - // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function' in __host__ __device__ function}} +#if __CUDA_ARCH__ && DEFER + typedef HostDeviceReturnTy ExpectedReturnTy; +#else + typedef TemplateReturnTy ExpectedReturnTy; #endif + HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); + ExpectedReturnTy ret2 = template_vs_hd_function(1); + // devnodeferonly-error@-1{{reference to __host__ function 'template_vs_hd_function' in __host__ __device__ function}} } __host__ void test_host_calls_hd_template() { @@ -367,14 +376,14 @@ __device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); } __device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); } #ifndef __CUDA_ARCH__ - // expected-note@-3 {{'device_only_function' declared here}} - // expected-note@-3 {{'device_only_function' declared here}} + // expected-note@-3 2{{'device_only_function' declared here}} + // expected-note@-3 2{{'device_only_function' declared here}} #endif __host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); } __host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); } #ifdef __CUDA_ARCH__ - // expected-note@-3 {{'host_only_function' declared here}} - // expected-note@-3 {{'host_only_function' declared here}} + // expected-note@-3 2{{'host_only_function' declared here}} + // expected-note@-3 2{{'host_only_function' declared here}} #endif __host__ __device__ void test_host_device_single_side_overloading() { @@ -392,6 +401,37 @@ #endif } +// wrong-sided overloading should not cause diagnostic unless it is emitted. +// This inline function is not emitted. +inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_no_diag() { + DeviceReturnTy ret1 = device_only_function(1); + DeviceReturnTy2 ret2 = device_only_function(1.0f); + HostReturnTy ret3 = host_only_function(1); + HostReturnTy2 ret4 = host_only_function(1.0f); +} + +// wrong-sided overloading should cause diagnostic if it is emitted. +// This inline function is emitted since it is called by an emitted function. +inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_diag() { + DeviceReturnTy ret1 = device_only_function(1); + DeviceReturnTy2 ret2 = device_only_function(1.0f); +#ifndef __CUDA_ARCH__ + // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}} + // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}} +#endif + HostReturnTy ret3 = host_only_function(1); + HostReturnTy2 ret4 = host_only_function(1.0f); +#ifdef __CUDA_ARCH__ + // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}} + // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}} +#endif +} + +__host__ __device__ void test_host_device_wrong_side_overloading_inline_diag_caller() { + test_host_device_wrong_side_overloading_inline_diag(); + // expected-note@-1 {{called by 'test_host_device_wrong_side_overloading_inline_diag_caller'}} +} + // Verify that we allow overloading function templates. template __host__ T template_overload(const T &a) { return a; }; template __device__ T template_overload(const T &a) { return a; }; @@ -419,3 +459,255 @@ int test_constexpr_overload(C2 &x, C2 &y) { return constexpr_overload(x, y); } + +// Verify no ambiguity for new operator. +void *a = new int; +__device__ void *b = new int; +// expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} + +// Verify no ambiguity for new operator. +template _Tp&& f(); +template()))> +void __test(); + +void foo() { + __test(); +} + +// Test resolving implicit host device candidate vs wrong-sided candidate. +// In device compilation, implicit host device caller choose implicit host +// device candidate and wrong-sided candidate with equal preference. +// Resolution result should not change with/without pragma. +namespace ImplicitHostDeviceVsWrongSided { +HostReturnTy callee(double x); +#pragma clang force_cuda_host_device begin +HostDeviceReturnTy callee(int x); +inline HostReturnTy implicit_hd_caller() { + return callee(1.0); +} +#pragma clang force_cuda_host_device end +} + +// Test resolving implicit host device candidate vs same-sided candidate. +// In host compilation, implicit host device caller choose implicit host +// device candidate and same-sided candidate with equal preference. +// Resolution result should not change with/without pragma. +namespace ImplicitHostDeviceVsSameSide { +HostReturnTy callee(int x); +#pragma clang force_cuda_host_device begin +HostDeviceReturnTy callee(double x); +inline HostDeviceReturnTy implicit_hd_caller() { + return callee(1.0); +} +#pragma clang force_cuda_host_device end +} + +// Test resolving explicit host device candidate vs. wrong-sided candidate. +// When -fgpu-defer-diag is off, wrong-sided candidate is not excluded, therefore +// the first callee is chosen. +// When -fgpu-defer-diag is on, wrong-sided candidate is excluded, therefore +// the second callee is chosen. +namespace ExplicitHostDeviceVsWrongSided { +HostReturnTy callee(double x); +__host__ __device__ HostDeviceReturnTy callee(int x); +#if __CUDA_ARCH__ && DEFER +typedef HostDeviceReturnTy ExpectedRetTy; +#else +typedef HostReturnTy ExpectedRetTy; +#endif +inline __host__ __device__ ExpectedRetTy explicit_hd_caller() { + return callee(1.0); +} +} + +// In the implicit host device function 'caller', the second 'callee' should be +// chosen since it has better match, even though it is an implicit host device +// function whereas the first 'callee' is a host function. A diagnostic will be +// emitted if the first 'callee' is chosen since deduced return type cannot be +// used before it is defined. +namespace ImplicitHostDeviceByConstExpr { +template a b; +auto callee(...); +template constexpr auto callee(d) -> decltype(0); +struct e { + template static auto g(ad, f...) { + return h)...>; + } + struct i { + template static constexpr auto caller(f... k) { + return callee(k...); + } + }; + template static auto h() { + return i::caller; + } +}; +class l { + l() { + e::g([] {}, this); + } +}; +} + +// Implicit HD candidate competes with device candidate. +// a and b have implicit HD copy ctor. In copy ctor of b, ctor of a is resolved. +// copy ctor of a should win over a(short), otherwise there will be ambiguity +// due to conversion operator. +namespace TestImplicitHDWithD { + struct a { + __device__ a(short); + __device__ operator unsigned() const; + __device__ operator int() const; + }; + struct b { + a d; + }; + void f(b g) { b e = g; } +} + +// Implicit HD candidate competes with host candidate. +// a and b have implicit HD copy ctor. In copy ctor of b, ctor of a is resolved. +// copy ctor of a should win over a(short), otherwise there will be ambiguity +// due to conversion operator. +namespace TestImplicitHDWithH { + struct a { + a(short); + __device__ operator unsigned() const; + __device__ operator int() const; + }; + struct b { + a d; + }; + void f(b g) { b e = g; } +} + +// Implicit HD candidate competes with HD candidate. +// a and b have implicit HD copy ctor. In copy ctor of b, ctor of a is resolved. +// copy ctor of a should win over a(short), otherwise there will be ambiguity +// due to conversion operator. +namespace TestImplicitHDWithHD { + struct a { + __host__ __device__ a(short); + __device__ operator unsigned() const; + __device__ operator int() const; + }; + struct b { + a d; + }; + void f(b g) { b e = g; } +} + +// HD candidate competes with H candidate. +// HD has type mismatch whereas H has type match. +// In device compilation, H wins when -fgpu-defer-diag is off and HD wins +// when -fgpu-defer-diags is on. In both cases the diagnostic should be +// deferred. +namespace TestDeferNoMatchingFuncNotEmitted { + template struct a {}; + namespace b { + struct c : a {}; + template void ag(d); + } // namespace b + template + __host__ __device__ void ag(a) { + ae e; + ag(e); + } + void f() { (void)ag; } +} + +namespace TestDeferNoMatchingFuncEmitted { + template struct a {}; + namespace b { + struct c : a {}; + template void ag(d); + // devnodeferonly-note@-1{{'ag' declared here}} + } // namespace b + template + __host__ __device__ void ag(a) { + ae e; + ag(e); + // devnodeferonly-error@-1{{reference to __host__ function 'ag' in __host__ __device__ function}} + // devdeferonly-error@-2{{no matching function for call to 'ag'}} + // devdeferonly-note@-3{{called by 'ag'}} + } + __host__ __device__ void f() { (void)ag; } + // devnodeferonly-note@-1{{called by 'f'}} + // devdeferonly-note@-2{{called by 'f'}} +} + +// Two HD candidates compete with H candidate. +// HDs have type mismatch whereas H has type match. +// In device compilation, H wins when -fgpu-defer-diag is off and two HD win +// when -fgpu-defer-diags is on. In both cases the diagnostic should be +// deferred. +namespace TestDeferAmbiguityNotEmitted { + template struct a {}; + namespace b { + struct c : a {}; + template void ag(d, int); + } // namespace b + template + __host__ __device__ void ag(a, float) { + ae e; + ag(e, 1); + } + template + __host__ __device__ void ag(a, double) { + } + void f() { + b::c x; + ag(x, 1); + } +} + +namespace TestDeferAmbiguityEmitted { + template struct a {}; + namespace b { + struct c : a {}; + template void ag(d, int); + // devnodeferonly-note@-1{{'ag' declared here}} + } // namespace b + template + __host__ __device__ void ag(a, float) { + // devdeferonly-note@-1{{candidate function [with ae = int]}} + ae e; + ag(e, 1); + } + template + __host__ __device__ void ag(a, double) { + // devdeferonly-note@-1{{candidate function [with ae = int]}} + } + __host__ __device__ void f() { + b::c x; + ag(x, 1); + // devnodeferonly-error@-1{{reference to __host__ function 'ag' in __host__ __device__ function}} + // devdeferonly-error@-2{{call to 'ag' is ambiguous}} + } +} + +// Implicit HD functions compute with H function and D function. +// In host compilation, foo(0.0, 2) should resolve to X::foo. +// In device compilation, foo(0.0, 2) should resolve to foo(double, int). +// In either case there should be no ambiguity. +namespace TestImplicitHDWithHAndD { + namespace X { + inline double foo(double, double) { return 0;} + inline constexpr float foo(float, float) { return 1;} + inline constexpr long double foo(long double, long double) { return 2;} + template inline constexpr double foo(_Tp, _Up) { return 3;} + }; + using X::foo; + inline __device__ double foo(double, double) { return 4;} + inline __device__ float foo(float, int) { return 5;} + inline __device__ float foo(int, int) { return 6;} + inline __device__ double foo(double, int) { return 7;} + inline __device__ float foo(float, float) { return 9;} + template inline __device__ double foo(_Tp, _Up) { return 10;} + + int g() { + return [](){ + return foo(0.0, 2); + }(); + } +}