diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11388,9 +11388,14 @@ /// /// Use this rather than examining the function's attributes yourself -- you /// will get it wrong. Returns CFT_Host if D is null. + CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs); CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D, bool IgnoreImplicitHDAttr = false); - CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs); + CUDAFunctionTarget IdentifyCUDATarget(const VarDecl *D, + bool IgnoreImplicitHDAttr = false); + // This routine is the top level dispatcher to more specific variants above. + CUDAFunctionTarget IdentifyCUDATarget(const Decl *D, + bool IgnoreImplicitHDAttr = false); /// Gets the CUDA target for the current context. CUDAFunctionTarget CurrentCUDATarget() { @@ -11410,24 +11415,53 @@ CFP_Native, // host-to-host or device-to-device calls. }; - /// Identifies relative preference of a given Caller/Callee + /// Identifies relative preference of a given callee and that call context /// combination, based on their host/device attributes. - /// \param Caller function which needs address of \p Callee. - /// nullptr in case of global context. - /// \param Callee target function + /// \param CallContextDecl The context decl which needs address of \p Callee. + /// Null in case of the global context. + /// \param Callee Target function. /// /// \returns preference value for particular Caller/Callee combination. - CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller, + CUDAFunctionPreference IdentifyCUDAPreference(const Decl *CallContextDecl, const FunctionDecl *Callee); + SmallVector CUDANonLocalVariableStack; + + void pushCUDANonLocalVariable(const Decl *D); + void popCUDANonLocalVariable(const Decl *D); + + const Decl *getCUDACurrentNonLocalVariable() const { + return CUDANonLocalVariableStack.empty() ? nullptr + : CUDANonLocalVariableStack.back(); + } + + const Decl *getCUDAContextDecl() const { + // Skip where no real call could be performed. + // TODO: There are cases where proper checking is required, such as the + // default member initializer. + if (!CurContext->isFileContext() && !CurContext->isFunctionOrMethod()) + return nullptr; + const Decl *ContextDecl = dyn_cast(CurContext); + if (!ContextDecl) + ContextDecl = getCUDACurrentNonLocalVariable(); + return ContextDecl; + } + /// Determines whether Caller may invoke Callee, based on their CUDA /// host/device attributes. Returns false if the call is not allowed. /// /// Note: Will return true for CFP_WrongSide calls. These may appear in /// semantically correct CUDA programs, but only if they're never codegen'ed. - bool IsAllowedCUDACall(const FunctionDecl *Caller, - const FunctionDecl *Callee) { - return IdentifyCUDAPreference(Caller, Callee) != CFP_Never; + enum SkipCallerKind_t { SkipNoneCaller, SkipImplicitCaller }; + bool isCUDACallAllowed(const FunctionDecl *Callee, + SkipCallerKind_t Kind = SkipNoneCaller) { + // Skip contexts where no real call could be performed. + if (!CurContext->isFileContext() && !CurContext->isFunctionOrMethod()) + return true; + if (const FunctionDecl *Caller = dyn_cast(CurContext)) + if (Kind == SkipImplicitCaller && Caller->isImplicit()) + return true; + return IdentifyCUDAPreference(getCUDAContextDecl(), Callee) != CFP_Never; } /// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD, @@ -11461,10 +11495,9 @@ void CUDASetLambdaAttrs(CXXMethodDecl *Method); /// Finds a function in \p Matches with highest calling priority - /// from \p Caller context and erases all functions with lower + /// from the current context and erases all functions with lower /// calling priority. void EraseUnwantedCUDAMatches( - const FunctionDecl *Caller, SmallVectorImpl> &Matches); /// Given a implicit special member, infer its CUDA target from the diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -2342,6 +2342,8 @@ } } + Actions.pushCUDANonLocalVariable(ThisDecl); + // Parse declarator '=' initializer. // If a '==' or '+=' is found, suggest a fixit to '='. if (isTokenEqualOrEqualTypo()) { @@ -2474,6 +2476,8 @@ Actions.ActOnUninitializedDecl(ThisDecl); } + Actions.popCUDANonLocalVariable(ThisDecl); + Actions.FinalizeDeclaration(ThisDecl); return ThisDecl; diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -95,7 +95,7 @@ } template -static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { +static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { return isa(Attribute) && !(IgnoreImplicitAttr && Attribute->isImplicit()); @@ -130,6 +130,41 @@ return CFT_Host; } +Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const VarDecl *D, + bool IgnoreImplicitHDAttr) { + if (D == nullptr) + return CFT_Host; + + assert(D->hasGlobalStorage() && "Only non-local variable needs identifying."); + + if (D->hasAttr()) + return CFT_InvalidTarget; + + if (hasAttr(D, IgnoreImplicitHDAttr)) + return CFT_Host; + + if (hasAttr(D, IgnoreImplicitHDAttr) || + hasAttr(D, IgnoreImplicitHDAttr) || + hasAttr(D, IgnoreImplicitHDAttr)) + return CFT_Device; + + return CFT_Host; +} + +Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const Decl *D, + bool IgnoreImplicitHDAttr) { + if (D == nullptr) + return CFT_Host; + + if (auto FD = dyn_cast(D)) + return IdentifyCUDATarget(FD, IgnoreImplicitHDAttr); + + if (auto VD = dyn_cast(D)) + return IdentifyCUDATarget(VD, IgnoreImplicitHDAttr); + + llvm_unreachable("Unexpected decl for CUDA target identification."); +} + // * CUDA Call preference table // // F - from, @@ -159,10 +194,10 @@ // | hd | hd | HD | HD | (b) | Sema::CUDAFunctionPreference -Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, +Sema::IdentifyCUDAPreference(const Decl *ContextDecl, const FunctionDecl *Callee) { assert(Callee && "Callee must be valid."); - CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); + CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(ContextDecl); CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); // If one of the targets is invalid, the check always fails, no matter what @@ -211,16 +246,17 @@ } void Sema::EraseUnwantedCUDAMatches( - const FunctionDecl *Caller, SmallVectorImpl> &Matches) { if (Matches.size() <= 1) return; using Pair = std::pair; - // Gets the CUDA function preference for a call from Caller to Match. + const Decl *ContextDecl = getCUDAContextDecl(); + + // Gets the CUDA function preference for a call from call context to Match. auto GetCFP = [&](const Pair &Match) { - return IdentifyCUDAPreference(Caller, Match.second); + return IdentifyCUDAPreference(ContextDecl, Match.second); }; // Find the best call preference among the functions in Matches. diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -16691,6 +16691,20 @@ return false; } +void Sema::pushCUDANonLocalVariable(const Decl *D) { + if (!D || D->isInvalidDecl() || !isNonlocalVariable(D)) + return; + CUDANonLocalVariableStack.push_back(D); +} + +void Sema::popCUDANonLocalVariable(const Decl *D) { + if (!D || D->isInvalidDecl() || !isNonlocalVariable(D)) + return; + assert(!CUDANonLocalVariableStack.empty() && + CUDANonLocalVariableStack.back() == D); + CUDANonLocalVariableStack.pop_back(); +} + /// Invoked when we are about to parse an initializer for the declaration /// 'Dcl'. /// diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1479,9 +1479,9 @@ bool Sema::isUsualDeallocationFunction(const CXXMethodDecl *Method) { // [CUDA] Ignore this function, if we can't call it. - const FunctionDecl *Caller = dyn_cast(CurContext); + const Decl *ContextDecl = getCUDAContextDecl(); if (getLangOpts().CUDA && - IdentifyCUDAPreference(Caller, Method) <= CFP_WrongSide) + IdentifyCUDAPreference(ContextDecl, Method) <= CFP_WrongSide) return false; SmallVector PreventedBy; @@ -1495,7 +1495,7 @@ return llvm::none_of(PreventedBy, [&](const FunctionDecl *FD) { assert(FD->getNumParams() == 1 && "Only single-operand functions should be in PreventedBy"); - return IdentifyCUDAPreference(Caller, FD) >= CFP_HostDevice; + return IdentifyCUDAPreference(ContextDecl, FD) >= CFP_HostDevice; }); } @@ -1558,8 +1558,7 @@ // In CUDA, determine how much we'd like / dislike to call this. if (S.getLangOpts().CUDA) - if (auto *Caller = dyn_cast(S.CurContext)) - CUDAPref = S.IdentifyCUDAPreference(Caller, FD); + CUDAPref = S.IdentifyCUDAPreference(S.getCUDAContextDecl(), FD); } explicit operator bool() const { return FD; } @@ -2600,7 +2599,7 @@ } if (getLangOpts().CUDA) - EraseUnwantedCUDAMatches(dyn_cast(CurContext), Matches); + EraseUnwantedCUDAMatches(Matches); } else { // C++1y [expr.new]p22: // For a non-placement allocation function, the normal deallocation diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -6300,17 +6300,12 @@ } // (CUDA B.1): Check for invalid calls between targets. - if (getLangOpts().CUDA) - if (const FunctionDecl *Caller = dyn_cast(CurContext)) - // Skip the check for callers that are implicit members, because in this - // case we may not yet know what the member's target is; the target is - // inferred for the member automatically, based on the bases and fields of - // the class. - if (!Caller->isImplicit() && !IsAllowedCUDACall(Caller, Function)) { - Candidate.Viable = false; - Candidate.FailureKind = ovl_fail_bad_target; - return; - } + if (getLangOpts().CUDA && + !isCUDACallAllowed(Function, Sema::SkipImplicitCaller)) { + Candidate.Viable = false; + Candidate.FailureKind = ovl_fail_bad_target; + return; + } if (Function->getTrailingRequiresClause()) { ConstraintSatisfaction Satisfaction; @@ -6821,13 +6816,11 @@ } // (CUDA B.1): Check for invalid calls between targets. - if (getLangOpts().CUDA) - if (const FunctionDecl *Caller = dyn_cast(CurContext)) - if (!IsAllowedCUDACall(Caller, Method)) { - Candidate.Viable = false; - Candidate.FailureKind = ovl_fail_bad_target; - return; - } + if (getLangOpts().CUDA && !isCUDACallAllowed(Method)) { + Candidate.Viable = false; + Candidate.FailureKind = ovl_fail_bad_target; + return; + } if (Method->getTrailingRequiresClause()) { ConstraintSatisfaction Satisfaction; @@ -9665,9 +9658,9 @@ } if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { - FunctionDecl *Caller = dyn_cast(S.CurContext); - return S.IdentifyCUDAPreference(Caller, Cand1.Function) > - S.IdentifyCUDAPreference(Caller, Cand2.Function); + const Decl *ContextDecl = S.getCUDAContextDecl(); + return S.IdentifyCUDAPreference(ContextDecl, Cand1.Function) > + S.IdentifyCUDAPreference(ContextDecl, Cand2.Function); } bool HasPS1 = Cand1.Function != nullptr && @@ -9771,19 +9764,19 @@ // candidate call is WrongSide and the other is SameSide, we ignore // the WrongSide candidate. if (S.getLangOpts().CUDA) { - const FunctionDecl *Caller = dyn_cast(S.CurContext); + const Decl *ContextDecl = S.getCUDAContextDecl(); bool ContainsSameSideCandidate = llvm::any_of(Candidates, [&](OverloadCandidate *Cand) { // Check viable function only. return Cand->Viable && Cand->Function && - S.IdentifyCUDAPreference(Caller, Cand->Function) == + S.IdentifyCUDAPreference(ContextDecl, Cand->Function) == Sema::CFP_SameSide; }); if (ContainsSameSideCandidate) { auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) { // Check viable function only to avoid unnecessary data copying/moving. return Cand->Viable && Cand->Function && - S.IdentifyCUDAPreference(Caller, Cand->Function) == + S.IdentifyCUDAPreference(ContextDecl, Cand->Function) == Sema::CFP_WrongSide; }; llvm::erase_if(Candidates, IsWrongSideCandidate); @@ -10770,10 +10763,10 @@ /// CUDA: diagnose an invalid call across targets. static void DiagnoseBadTarget(Sema &S, OverloadCandidate *Cand) { - FunctionDecl *Caller = cast(S.CurContext); + const Decl *ContextDecl = S.getCUDAContextDecl(); FunctionDecl *Callee = Cand->Function; - Sema::CUDAFunctionTarget CallerTarget = S.IdentifyCUDATarget(Caller), + Sema::CUDAFunctionTarget CallerTarget = S.IdentifyCUDATarget(ContextDecl), CalleeTarget = S.IdentifyCUDATarget(Callee); std::string FnDesc; @@ -11809,10 +11802,9 @@ return false; if (FunctionDecl *FunDecl = dyn_cast(Fn)) { - if (S.getLangOpts().CUDA) - if (FunctionDecl *Caller = dyn_cast(S.CurContext)) - if (!Caller->isImplicit() && !S.IsAllowedCUDACall(Caller, FunDecl)) - return false; + if (S.getLangOpts().CUDA && + !S.isCUDACallAllowed(FunDecl, Sema::SkipImplicitCaller)) + return false; if (FunDecl->isMultiVersion()) { const auto *TA = FunDecl->getAttr(); if (TA && !TA->isDefaultVersion()) @@ -11927,7 +11919,7 @@ } void EliminateSuboptimalCudaMatches() { - S.EraseUnwantedCUDAMatches(dyn_cast(S.CurContext), Matches); + S.EraseUnwantedCUDAMatches(Matches); } public: diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu --- a/clang/test/SemaCUDA/function-overload.cu +++ b/clang/test/SemaCUDA/function-overload.cu @@ -214,8 +214,10 @@ // Test for address of overloaded function resolution in the global context. HostFnPtr fp_h = h; HostFnPtr fp_ch = ch; +#if !defined(__CUDA_ARCH__) CurrentFnPtr fp_dh = dh; CurrentFnPtr fp_cdh = cdh; +#endif GlobalFnPtr fp_g = g; @@ -419,3 +421,28 @@ int test_constexpr_overload(C2 &x, C2 &y) { return constexpr_overload(x, y); } + +__device__ float fn(int); +__host__ float fn(float); + +// Overload resolution in the global initialization should follow the same rule +// as the one in other places. That is, we prefer a callable function over a +// non-callable function with a better signature match. In this test case, even +// though the device function has exactly matching with the integer argument, +// it can't be executed. +float gvar1 = fn(1); + +__device__ float dev_only_fn(int); +// expected-note@-1 {{candidate function not viable: call to __device__ function from __host__ function}} + +float gvar2 = dev_only_fn(1); // expected-error {{no matching function for call to 'dev_only_fn'}} + +#ifdef __CUDA_ARCH__ +__device__ DeviceReturnTy gvar3 = template_vs_function(1.f); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +__device__ int gvar4 = template_overload(1); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +#else +TemplateReturnTy gvar3 = template_vs_function(2.f); +int gvar4 = template_overload(1); +#endif diff --git a/clang/test/SemaCUDA/global-initializers-host.cu b/clang/test/SemaCUDA/global-initializers-host.cu --- a/clang/test/SemaCUDA/global-initializers-host.cu +++ b/clang/test/SemaCUDA/global-initializers-host.cu @@ -6,12 +6,14 @@ // module initializer. struct S { + // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}} + // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}} __device__ S() {} - // expected-note@-1 {{'S' declared here}} + // expected-note@-1 {{candidate constructor not viable: call to __device__ function from __host__ function}} }; S s; -// expected-error@-1 {{reference to __device__ function 'S' in global initializer}} +// expected-error@-1 {{no matching constructor for initialization of 'S'}} struct T { __host__ __device__ T() {} @@ -19,14 +21,17 @@ T t; // No error, this is OK. struct U { + // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int' to 'const U' for 1st argument}} + // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int' to 'U' for 1st argument}} __host__ U() {} + // expected-note@-1 {{candidate constructor not viable: requires 0 arguments, but 1 was provided}} __device__ U(int) {} - // expected-note@-1 {{'U' declared here}} + // expected-note@-1 {{candidate constructor not viable: call to __device__ function from __host__ function}} }; U u(42); -// expected-error@-1 {{reference to __device__ function 'U' in global initializer}} +// expected-error@-1 {{no matching constructor for initialization of 'U'}} __device__ int device_fn() { return 42; } -// expected-note@-1 {{'device_fn' declared here}} +// expected-note@-1 {{candidate function not viable: call to __device__ function from __host__ function}} int n = device_fn(); -// expected-error@-1 {{reference to __device__ function 'device_fn' in global initializer}} +// expected-error@-1 {{no matching function for call to 'device_fn'}} diff --git a/clang/test/SemaCUDA/hip-pinned-shadow.cu b/clang/test/SemaCUDA/hip-pinned-shadow.cu --- a/clang/test/SemaCUDA/hip-pinned-shadow.cu +++ b/clang/test/SemaCUDA/hip-pinned-shadow.cu @@ -13,13 +13,19 @@ template struct texture : public textureReference { +// expected-note@-1{{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}} +// expected-note@-2{{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}} +// expected-note@-3{{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}} +// expected-note@-4{{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}} texture() { a = 1; } +// expected-note@-1{{candidate constructor not viable: call to __host__ function from __device__ function}} +// expected-note@-2{{candidate constructor not viable: call to __host__ function from __device__ function}} }; __hip_pinned_shadow__ texture tex; __device__ __hip_pinned_shadow__ texture tex2; // expected-error{{'hip_pinned_shadow' and 'device' attributes are not compatible}} - // expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables}} - // expected-note@-2{{conflicting attribute is here}} + // expected-note@-1{{conflicting attribute is here}} + // expected-error@-2{{no matching constructor for initialization of 'texture'}} __constant__ __hip_pinned_shadow__ texture tex3; // expected-error{{'hip_pinned_shadow' and 'constant' attributes are not compatible}} - // expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables}} - // expected-note@-2{{conflicting attribute is here}} + // expected-note@-1{{conflicting attribute is here}} + // expected-error@-2{{no matching constructor for initialization of 'texture'}}