diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -123,7 +123,8 @@ return CFT_Device; } else if (hasAttr(D, IgnoreImplicitHDAttr)) { return CFT_Host; - } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { + } else if ((D->isImplicit() || !D->isUserProvided()) && + !IgnoreImplicitHDAttr) { // Some implicit declarations (like intrinsic functions) are not marked. // Set the most lenient target on them for maximal flexibility. return CFT_HostDevice; diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1527,9 +1527,24 @@ bool Sema::isUsualDeallocationFunction(const CXXMethodDecl *Method) { // [CUDA] Ignore this function, if we can't call it. const FunctionDecl *Caller = dyn_cast(CurContext); - if (getLangOpts().CUDA && - IdentifyCUDAPreference(Caller, Method) <= CFP_WrongSide) - return false; + if (getLangOpts().CUDA) { + auto CallPreference = IdentifyCUDAPreference(Caller, Method); + // If it's not callable at all, it's not the right function. + if (CallPreference < CFP_WrongSide) + return false; + if (CallPreference == CFP_WrongSide) { + // Maybe. We have to check if there are better alternatives. + DeclContext::lookup_result R = + Method->getDeclContext()->lookup(Method->getDeclName()); + for (const auto *D : R) { + if (const auto *FD = dyn_cast(D)) { + if (IdentifyCUDAPreference(Caller, FD) > CFP_WrongSide) + return false; + } + } + // We've found no better variants. + } + } SmallVector PreventedBy; bool Result = Method->isUsualDeallocationFunction(PreventedBy); diff --git a/clang/test/CodeGenCUDA/usual-deallocators.cu b/clang/test/CodeGenCUDA/usual-deallocators.cu --- a/clang/test/CodeGenCUDA/usual-deallocators.cu +++ b/clang/test/CodeGenCUDA/usual-deallocators.cu @@ -12,6 +12,19 @@ extern "C" __device__ void dev_fn(); extern "C" __host__ __device__ void hd_fn(); +// Destructors are handled a bit differently, compared to regular functions. +// Make sure we do trigger kernel generation on the GPU side even if it's only +// referenced by the destructor. +template __global__ void f(T) {} +template struct A { + ~A() { f<<<1, 1>>>(T()); } +}; + +// HOST-LABEL: @a +A a; +// HOST-LABEL: define linkonce_odr void @_ZN1AIiED1Ev +// search further down for the deice-side checks for @_Z1fIiEvT_ + struct H1D1 { __host__ void operator delete(void *) { host_fn(); }; __device__ void operator delete(void *) { dev_fn(); }; @@ -95,6 +108,9 @@ test_hd(t); } +// Make sure that we've generated the kernel used by A::~A. +// DEVICE-LABEL: define dso_local void @_Z1fIiEvT_ + // Make sure we've picked deallocator for the correct side of compilation. // COMMON-LABEL: define linkonce_odr void @_ZN4H1D1dlEPv(i8* %0) @@ -131,3 +147,5 @@ // COMMON-LABEL: define linkonce_odr void @_ZN8H1H2D1D2dlEPv(i8* %0) // DEVICE: call void @dev_fn() // HOST: call void @host_fn() + +// DEVICE: !0 = !{void (i32)* @_Z1fIiEvT_, !"kernel", i32 1} diff --git a/clang/test/SemaCUDA/usual-deallocators.cu b/clang/test/SemaCUDA/usual-deallocators.cu --- a/clang/test/SemaCUDA/usual-deallocators.cu +++ b/clang/test/SemaCUDA/usual-deallocators.cu @@ -93,3 +93,12 @@ test_hd(t); test_hd(t); } + +// This should produce no errors. Defaulted destructor should be treated as HD, +// which allows referencing host-only `operator delete` with a deferred +// diagnostics that would fire if we ever attempt to codegen it on device.. +struct H { + virtual ~H() = default; + static void operator delete(void *) {} +}; +H h;