diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1012,6 +1012,14 @@ } } DelayedDiagnostics; + enum CUDAFunctionTarget { + CFT_Device, + CFT_Global, + CFT_Host, + CFT_HostDevice, + CFT_InvalidTarget + }; + /// A RAII object to temporarily push a declaration context. class ContextRAII { private: @@ -4751,8 +4759,13 @@ bool isValidPointerAttrType(QualType T, bool RefOkay = false); bool CheckRegparmAttr(const ParsedAttr &attr, unsigned &value); + + /// Check validaty of calling convention attribute \p attr. If \p FD + /// is not null pointer, use \p FD to determine the CUDA/HIP host/device + /// target. Otherwise, it is specified by \p CFT. bool CheckCallingConvAttr(const ParsedAttr &attr, CallingConv &CC, - const FunctionDecl *FD = nullptr); + const FunctionDecl *FD = nullptr, + CUDAFunctionTarget CFT = CFT_InvalidTarget); bool CheckAttrTarget(const ParsedAttr &CurrAttr); bool CheckAttrNoArgs(const ParsedAttr &CurrAttr); bool checkStringLiteralArgumentAttr(const AttributeCommonInfo &CI, @@ -13259,14 +13272,6 @@ void checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D = nullptr); - enum CUDAFunctionTarget { - CFT_Device, - CFT_Global, - CFT_Host, - CFT_HostDevice, - CFT_InvalidTarget - }; - /// Determines whether the given function is a CUDA device/host/kernel/etc. /// function. /// @@ -13285,6 +13290,29 @@ /// Determines whether the given variable is emitted on host or device side. CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D); + /// Defines kinds of CUDA global host/device context where a function may be + /// called. + enum CUDATargetContextKind { + CTCK_Unknown, /// Unknown context + CTCK_InitGlobalVar, /// Function called during global variable + /// initialization + }; + + /// Define the current global CUDA host/device context where a function may be + /// called. Only used when a function is called outside of any functions. + struct CUDATargetContext { + CUDAFunctionTarget Target = CFT_HostDevice; + CUDATargetContextKind Kind = CTCK_Unknown; + Decl *D = nullptr; + } CurCUDATargetCtx; + + struct CUDATargetContextRAII { + Sema &S; + CUDATargetContext SavedCtx; + CUDATargetContextRAII(Sema &S_, CUDATargetContextKind K, Decl *D); + ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; } + }; + /// Gets the CUDA target for the current context. CUDAFunctionTarget CurrentCUDATarget() { return IdentifyCUDATarget(dyn_cast(CurContext)); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -2583,6 +2583,7 @@ } } + Sema::CUDATargetContextRAII X(Actions, Sema::CTCK_InitGlobalVar, ThisDecl); switch (TheInitKind) { // Parse declarator '=' initializer. case InitKind::Equal: { diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -105,19 +105,37 @@ } template -static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { +static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { return isa(Attribute) && !(IgnoreImplicitAttr && Attribute->isImplicit()); }); } +Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_, + CUDATargetContextKind K, + Decl *D) + : S(S_) { + SavedCtx = S.CurCUDATargetCtx; + assert(K == CTCK_InitGlobalVar); + auto *VD = dyn_cast_or_null(D); + if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { + auto Target = CFT_Host; + if ((hasAttr(VD, /*IgnoreImplicit=*/true) && + !hasAttr(VD, /*IgnoreImplicit=*/true)) || + hasAttr(VD, /*IgnoreImplicit=*/true) || + hasAttr(VD, /*IgnoreImplicit=*/true)) + Target = CFT_Device; + S.CurCUDATargetCtx = {Target, K, VD}; + } +} + /// IdentifyCUDATarget - Determine the CUDA compilation target for this function Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, bool IgnoreImplicitHDAttr) { - // Code that lives outside a function is run on the host. + // Code that lives outside a function gets the target from CurCUDATargetCtx. if (D == nullptr) - return CFT_Host; + return CurCUDATargetCtx.Target; if (D->hasAttr()) return CFT_InvalidTarget; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5317,7 +5317,8 @@ } bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, - const FunctionDecl *FD) { + const FunctionDecl *FD, + CUDAFunctionTarget CFT) { if (Attrs.isInvalid()) return true; @@ -5416,7 +5417,8 @@ // on their host/device attributes. if (LangOpts.CUDA) { auto *Aux = Context.getAuxTargetInfo(); - auto CudaTarget = IdentifyCUDATarget(FD); + assert(FD || CFT != CFT_InvalidTarget); + auto CudaTarget = FD ? IdentifyCUDATarget(FD) : CFT; bool CheckHost = false, CheckDevice = false; switch (CudaTarget) { case CFT_HostDevice: diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -6699,17 +6699,19 @@ } // (CUDA B.1): Check for invalid calls between targets. - if (getLangOpts().CUDA) - if (const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true)) - // Skip the check for callers that are implicit members, because in this - // case we may not yet know what the member's target is; the target is - // inferred for the member automatically, based on the bases and fields of - // the class. - if (!Caller->isImplicit() && !IsAllowedCUDACall(Caller, Function)) { - Candidate.Viable = false; - Candidate.FailureKind = ovl_fail_bad_target; - return; - } + if (getLangOpts().CUDA) { + const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); + // Skip the check for callers that are implicit members, because in this + // case we may not yet know what the member's target is; the target is + // inferred for the member automatically, based on the bases and fields of + // the class. + if (!(Caller && Caller->isImplicit()) && + !IsAllowedCUDACall(Caller, Function)) { + Candidate.Viable = false; + Candidate.FailureKind = ovl_fail_bad_target; + return; + } + } if (Function->getTrailingRequiresClause()) { ConstraintSatisfaction Satisfaction; @@ -7221,12 +7223,11 @@ // (CUDA B.1): Check for invalid calls between targets. if (getLangOpts().CUDA) - if (const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true)) - if (!IsAllowedCUDACall(Caller, Method)) { - Candidate.Viable = false; - Candidate.FailureKind = ovl_fail_bad_target; - return; - } + if (!IsAllowedCUDACall(getCurFunctionDecl(/*AllowLambda=*/true), Method)) { + Candidate.Viable = false; + Candidate.FailureKind = ovl_fail_bad_target; + return; + } if (Method->getTrailingRequiresClause()) { ConstraintSatisfaction Satisfaction; @@ -12497,10 +12498,12 @@ return false; if (FunctionDecl *FunDecl = dyn_cast(Fn)) { - if (S.getLangOpts().CUDA) - if (FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true)) - if (!Caller->isImplicit() && !S.IsAllowedCUDACall(Caller, FunDecl)) - return false; + if (S.getLangOpts().CUDA) { + FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true); + if (!(Caller && Caller->isImplicit()) && + !S.IsAllowedCUDACall(Caller, FunDecl)) + return false; + } if (FunDecl->isMultiVersion()) { const auto *TA = FunDecl->getAttr(); if (TA && !TA->isDefaultVersion()) diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -4055,7 +4055,8 @@ // function type. We'll diagnose the failure to apply them in // handleFunctionTypeAttr. CallingConv CC; - if (!S.CheckCallingConvAttr(AL, CC) && + if (!S.CheckCallingConvAttr(AL, CC, /*FunctionDecl=*/nullptr, + S.IdentifyCUDATarget(D.getAttributes())) && (!FTI.isVariadic || supportsVariadicCall(CC))) { return CC; } diff --git a/clang/test/CodeGenCUDA/global-initializers.cu b/clang/test/CodeGenCUDA/global-initializers.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/global-initializers.cu @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 %s -triple x86_64-linux-unknown -emit-llvm -o - \ +// RUN: | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 %s -fcuda-is-device \ +// RUN: -emit-llvm -o - -triple nvptx64 \ +// RUN: -aux-triple x86_64-unknown-linux-gnu | FileCheck \ +// RUN: -check-prefix=DEV %s + +#include "Inputs/cuda.h" + +// Check host/device-based overloding resolution in global variable initializer. +double pow(double, double) { return 1.0; } + +__device__ double pow(double, int) { return 2.0; } + +// HOST-DAG: call {{.*}}double @_Z3powdd(double noundef 1.000000e+00, double noundef 1.000000e+00) +double X = pow(1.0, 1); + +constexpr double cpow(double, double) { return 11.0; } + +constexpr __device__ double cpow(double, int) { return 12.0; } + +// HOST-DAG: @CX = global double 1.100000e+01 +double CX = cpow(11.0, 1); + +// DEV-DAG: @CY = addrspace(1) externally_initialized global double 1.200000e+01 +__device__ double CY = cpow(12.0, 1); + +struct A { + double pow(double, double) { return 3.0; } + + __device__ double pow(double, int) { return 4.0; } +}; + +A a; + +// HOST-DAG: call {{.*}}double @_ZN1A3powEdd(ptr {{.*}}@a, double noundef 3.000000e+00, double noundef 1.000000e+00) +double AX = a.pow(3.0, 1); + +struct CA { + constexpr double cpow(double, double) const { return 13.0; } + + constexpr __device__ double cpow(double, int) const { return 14.0; } +}; + +const CA ca; + +// HOST-DAG: @CAX = global double 1.300000e+01 +double CAX = ca.cpow(13.0, 1); + +// DEV-DAG: @CAY = addrspace(1) externally_initialized global double 1.400000e+01 +__device__ double CAY = ca.cpow(14.0, 1); diff --git a/clang/test/SemaCUDA/amdgpu-windows-vectorcall.cu b/clang/test/SemaCUDA/amdgpu-windows-vectorcall.cu --- a/clang/test/SemaCUDA/amdgpu-windows-vectorcall.cu +++ b/clang/test/SemaCUDA/amdgpu-windows-vectorcall.cu @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-pc-windows-msvc -fms-compatibility -fcuda-is-device -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fms-compatibility -fsyntax-only -verify %s __cdecl void hostf1(); __vectorcall void (*hostf2)() = hostf1; // expected-error {{cannot initialize a variable of type 'void ((*))() __attribute__((vectorcall))' with an lvalue of type 'void () __attribute__((cdecl))'}} diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu --- a/clang/test/SemaCUDA/function-overload.cu +++ b/clang/test/SemaCUDA/function-overload.cu @@ -222,7 +222,13 @@ // Test for address of overloaded function resolution in the global context. HostFnPtr fp_h = h; HostFnPtr fp_ch = ch; +#if defined (__CUDA_ARCH__) +__device__ +#endif CurrentFnPtr fp_dh = dh; +#if defined (__CUDA_ARCH__) +__device__ +#endif CurrentFnPtr fp_cdh = cdh; GlobalFnPtr fp_g = g; diff --git a/clang/test/SemaCUDA/global-initializers-host.cu b/clang/test/SemaCUDA/global-initializers-host.cu deleted file mode 100644 --- a/clang/test/SemaCUDA/global-initializers-host.cu +++ /dev/null @@ -1,32 +0,0 @@ -// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-linux-unknown -fsyntax-only -o - -verify - -#include "Inputs/cuda.h" - -// Check that we get an error if we try to call a __device__ function from a -// module initializer. - -struct S { - __device__ S() {} - // expected-note@-1 {{'S' declared here}} -}; - -S s; -// expected-error@-1 {{reference to __device__ function 'S' in global initializer}} - -struct T { - __host__ __device__ T() {} -}; -T t; // No error, this is OK. - -struct U { - __host__ U() {} - __device__ U(int) {} - // expected-note@-1 {{'U' declared here}} -}; -U u(42); -// expected-error@-1 {{reference to __device__ function 'U' in global initializer}} - -__device__ int device_fn() { return 42; } -// expected-note@-1 {{'device_fn' declared here}} -int n = device_fn(); -// expected-error@-1 {{reference to __device__ function 'device_fn' in global initializer}} diff --git a/clang/test/SemaCUDA/global-initializers.cu b/clang/test/SemaCUDA/global-initializers.cu new file mode 100644 --- /dev/null +++ b/clang/test/SemaCUDA/global-initializers.cu @@ -0,0 +1,72 @@ +// RUN: %clang_cc1 %s -triple x86_64-linux-unknown -fsyntax-only -o - -verify +// RUN: %clang_cc1 %s -fcuda-is-device -triple nvptx -fsyntax-only -o - -verify + +#include "Inputs/cuda.h" + +// Check that we get an error if we try to call a __device__ function from a +// module initializer. + +struct S { + // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}} + // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}} + __device__ S() {} + // expected-note@-1 {{candidate constructor not viable: call to __device__ function from __host__ function}} +}; + +S s; +// expected-error@-1 {{no matching constructor for initialization of 'S'}} + +struct T { + __host__ __device__ T() {} +}; +T t; // No error, this is OK. + +struct U { + // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int' to 'const U' for 1st argument}} + // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int' to 'U' for 1st argument}} + __host__ U() {} + // expected-note@-1 {{candidate constructor not viable: requires 0 arguments, but 1 was provided}} + __device__ U(int) {} + // expected-note@-1 {{candidate constructor not viable: call to __device__ function from __host__ function}} +}; +U u(42); +// expected-error@-1 {{no matching constructor for initialization of 'U'}} + +__device__ int device_fn() { return 42; } +// expected-note@-1 {{candidate function not viable: call to __device__ function from __host__ function}} +int n = device_fn(); +// expected-error@-1 {{no matching function for call to 'device_fn'}} + +// Check host/device-based overloding resolution in global variable initializer. +double pow(double, double); + +__device__ double pow(double, int); + +double X = pow(1.0, 1); +__device__ double Y = pow(2.0, 2); // expected-error{{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + +constexpr double cpow(double, double) { return 1.0; } + +constexpr __device__ double cpow(double, int) { return 2.0; } + +const double CX = cpow(1.0, 1); +const __device__ double CY = cpow(2.0, 2); + +struct A { + double pow(double, double); + + __device__ double pow(double, int); + + constexpr double cpow(double, double) const { return 1.0; } + + constexpr __device__ double cpow(double, int) const { return 1.0; } + +}; + +A a; +double AX = a.pow(1.0, 1); +__device__ double AY = a.pow(2.0, 2); // expected-error{{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + +const A ca; +const double CAX = ca.cpow(1.0, 1); +const __device__ double CAY = ca.cpow(2.0, 2);