Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -12063,6 +12063,15 @@ bool IgnoreImplicitHDAttr = false); CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs); + enum CUDAVariableTarget { + CVT_Device, /// Emitted on device side with a shadow variable on host side + CVT_Host, /// Emitted on host side only + CVT_Both, /// Emitted on both sides with different addresses + CVT_Unified, /// Emitted as a unified address, e.g. managed variables + }; + /// Determines whether the given variable is emitted on host or device side. + CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D); + /// Gets the CUDA target for the current context. CUDAFunctionTarget CurrentCUDATarget() { return IdentifyCUDATarget(dyn_cast(CurContext)); Index: clang/lib/CodeGen/CGDeclCXX.cpp =================================================================== --- clang/lib/CodeGen/CGDeclCXX.cpp +++ clang/lib/CodeGen/CGDeclCXX.cpp @@ -644,7 +644,9 @@ Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL); } - if (getLangOpts().HIP) { + assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice || + getLangOpts().GPUAllowDeviceInit); + if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) { Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); Fn->addFnAttr("device-init"); } Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -2364,6 +2364,8 @@ } // Emit CUDA/HIP static device variables referenced by host code only. + // Note we should not clear CUDADeviceVarODRUsedByHost since it is still + // needed for further handling. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) for (const auto *V : getContext().CUDADeviceVarODRUsedByHost) DeferredDeclsToEmit.push_back(V); Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -26,6 +26,14 @@ #include "llvm/ADT/SmallVector.h" using namespace clang; +template static bool hasExplicitAttr(const VarDecl *D) { + if (!D) + return false; + if (auto *A = D->getAttr()) + return !A->isImplicit(); + return false; +} + void Sema::PushForceCUDAHostDevice() { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); ForceCUDAHostDeviceDepth++; @@ -133,6 +141,35 @@ return CFT_Host; } +/// IdentifyTarget - Determine the CUDA compilation target for this variable. +Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) { + if (Var->hasAttr()) + return CVT_Unified; + if (Var->isConstexpr() && !hasExplicitAttr(Var)) + return CVT_Both; + if (Var->hasAttr() || Var->hasAttr() || + Var->hasAttr() || + Var->getType()->isCUDADeviceBuiltinSurfaceType() || + Var->getType()->isCUDADeviceBuiltinTextureType()) + return CVT_Device; + // Function-scope static variable without explicit device or constant + // attribute are emitted + // - on both sides in host device functions + // - on device side in device or global functions + if (auto *FD = dyn_cast(Var->getDeclContext())) { + switch (IdentifyCUDATarget(FD)) { + case CFT_HostDevice: + return CVT_Both; + case CFT_Device: + case CFT_Global: + return CVT_Device; + default: + return CVT_Host; + } + } + return CVT_Host; +} + // * CUDA Call preference table // // F - from, @@ -637,7 +674,8 @@ void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { if (getLangOpts().CUDAIsDevice && VD->isConstexpr() && - (VD->isFileVarDecl() || VD->isStaticDataMember())) { + (VD->isFileVarDecl() || VD->isStaticDataMember()) && + !VD->hasAttr()) { VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); } } Index: clang/lib/Sema/SemaDeclAttr.cpp =================================================================== --- clang/lib/Sema/SemaDeclAttr.cpp +++ clang/lib/Sema/SemaDeclAttr.cpp @@ -4410,6 +4410,13 @@ S.Diag(AL.getLoc(), diag::err_cuda_nonstatic_constdev); return; } + // constexpr variable may already get an implicit constant attr, which should + // be replaced by the explicit constant attr. + if (auto *A = D->getAttr()) { + if (!A->isImplicit()) + return; + D->dropAttr(); + } D->addAttr(::new (S.Context) CUDAConstantAttr(S.Context, AL)); } Index: clang/lib/Sema/SemaExpr.cpp =================================================================== --- clang/lib/Sema/SemaExpr.cpp +++ clang/lib/Sema/SemaExpr.cpp @@ -17133,28 +17133,20 @@ if (SemaRef.LangOpts.CUDA && Var && Var->hasGlobalStorage()) { auto *FD = dyn_cast_or_null(SemaRef.CurContext); - auto Target = SemaRef.IdentifyCUDATarget(FD); - auto IsEmittedOnDeviceSide = [](VarDecl *Var) { - if (Var->hasAttr() || Var->hasAttr() || - Var->hasAttr() || - Var->getType()->isCUDADeviceBuiltinSurfaceType() || - Var->getType()->isCUDADeviceBuiltinTextureType()) - return true; - // Function-scope static variable in device functions or kernels are - // emitted on device side. - if (auto *FD = dyn_cast(Var->getDeclContext())) { - return FD->hasAttr() || FD->hasAttr(); - } - return false; - }; - if (!IsEmittedOnDeviceSide(Var)) { + auto VarTarget = SemaRef.IdentifyCUDATarget(Var); + auto UserTarget = SemaRef.IdentifyCUDATarget(FD); + if (VarTarget == Sema::CVT_Host && + (UserTarget == Sema::CFT_Device || UserTarget == Sema::CFT_HostDevice || + UserTarget == Sema::CFT_Global)) { // Diagnose ODR-use of host global variables in device functions. // Reference of device global variables in host functions is allowed // through shadow variables therefore it is not diagnosed. if (SemaRef.LangOpts.CUDAIsDevice) SemaRef.targetDiag(Loc, diag::err_ref_bad_target) - << /*host*/ 2 << /*variable*/ 1 << Var << Target; - } else if ((Target == Sema::CFT_Host || Target == Sema::CFT_HostDevice) && + << /*host*/ 2 << /*variable*/ 1 << Var << UserTarget; + } else if (VarTarget == Sema::CVT_Device && + (UserTarget == Sema::CFT_Host || + UserTarget == Sema::CFT_HostDevice) && !Var->hasExternalStorage()) { // Record a CUDA/HIP device side variable if it is ODR-used // by host code. This is done conservatively, when the variable is Index: clang/test/AST/ast-dump-constant-var.cu =================================================================== --- /dev/null +++ clang/test/AST/ast-dump-constant-var.cu @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -std=c++14 -ast-dump -x hip %s | FileCheck -check-prefixes=CHECK,HOST %s +// RUN: %clang_cc1 -std=c++14 -ast-dump -fcuda-is-device -x hip %s | FileCheck -check-prefixes=CHECK,DEV %s + +#include "Inputs/cuda.h" + +// CHECK-LABEL: VarDecl {{.*}} m1 'int' +// CHECK-NEXT: CUDAConstantAttr {{.*}}cuda.h +__constant__ int m1; + +// CHECK-LABEL: VarDecl {{.*}} m2 'int' +// CHECK-NEXT: CUDAConstantAttr {{.*}}cuda.h +// CHECK-NOT: CUDAConstantAttr +__constant__ __constant__ int m2; + +// CHECK-LABEL: VarDecl {{.*}} m3 'const int' +// HOST-NOT: CUDAConstantAttr +// DEV-NOT: CUDAConstantAttr {{.*}}cuda.h +// DEV: CUDAConstantAttr {{.*}}Implicit +// DEV-NOT: CUDAConstantAttr {{.*}}cuda.h +constexpr int m3 = 1; + +// CHECK-LABEL: VarDecl {{.*}} m3a 'const int' +// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit +// CHECK: CUDAConstantAttr {{.*}}cuda.h +// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit +constexpr __constant__ int m3a = 2; + +// CHECK-LABEL: VarDecl {{.*}} m3b 'const int' +// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit +// CHECK: CUDAConstantAttr {{.*}}cuda.h +// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit +__constant__ constexpr int m3b = 3; Index: clang/test/CodeGenCUDA/host-used-device-var.cu =================================================================== --- clang/test/CodeGenCUDA/host-used-device-var.cu +++ clang/test/CodeGenCUDA/host-used-device-var.cu @@ -66,30 +66,148 @@ template __device__ func_t p_add_func = add_func; +// Check non-constant constexpr variables ODR-used by host code only is not emitted. +// DEV-NEG-NOT: constexpr_var1a +// DEV-NEG-NOT: constexpr_var1b +constexpr int constexpr_var1a = 1; +inline constexpr int constexpr_var1b = 1; + +// Check constant constexpr variables ODR-used by host code only. +// Non-inline constexpr variable has internal linkage, therefore it is not accessible by host and not kept. +// Inline constexpr variable has linkonce_ord linkage, therefore it can be accessed by host and kept. +// DEV-NEG-NOT: constexpr_var2a +// DEV-DAG: @constexpr_var2b = linkonce_odr addrspace(4) externally_initialized constant i32 2 +__constant__ constexpr int constexpr_var2a = 2; +inline __constant__ constexpr int constexpr_var2b = 2; + void use(func_t p); -void use(int *p); +__host__ __device__ void use(const int *p); +// Check static device variable in host function. +// DEV-DAG: @_ZZ4fun1vE11static_var1 = dso_local addrspace(1) externally_initialized global i32 3 void fun1() { + static __device__ int static_var1 = 3; use(&u1); use(&u2); use(&u3); use(&ext_var); use(&inline_var); use(p_add_func); + use(&constexpr_var1a); + use(&constexpr_var1b); + use(&constexpr_var2a); + use(&constexpr_var2b); + use(&static_var1); +} + +// Check static variable in host device function. +// DEV-DAG: @_ZZ4fun2vE11static_var2 = internal addrspace(1) global i32 4 +// DEV-DAG: @_ZZ4fun2vE11static_var3 = dso_local addrspace(1) global i32 4 +__host__ __device__ void fun2() { + static int static_var2 = 4; + static __device__ int static_var3 = 4; + use(&static_var2); + use(&static_var3); } __global__ void kern1(int **x) { *x = &u4; + fun2(); +} + +// Check static variables of lambda functions. + +// Lambda functions are implicit host device functions. +// Default static variables in lambda functions should be treated +// as host variables on host side, therefore should not be forced +// to be emitted on device. + +// DEV-DAG: @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2 = dso_local addrspace(1) externally_initialized global i32 5 +// DEV-NEG-NOT: @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var1 +namespace TestStaticVarInLambda { +class A { +public: + A(char *); +}; +void fun() { + (void) [](char *c) { + static A var1(c); + static __device__ int var2 = 5; + (void) var1; + (void) var2; + }; +} +} + +// Check implicit constant variable ODR-used by host code is not emitted. + +// AST contains instantiation of al, which triggers AST instantiation +// of x::al::am, which triggers AST instatiation of x::ap, +// which triggers AST instantiation of aw::c, which has type +// ar. ar has base class x which has member ah. x::ah is initialized +// with function pointer pointing to ar:as, which returns an object +// of type ou. The constexpr aw::c is an implicit constant variable +// which is ODR-used by host function x::ap. An incorrect implementation +// will force aw::c to be emitted on device side, which will trigger +// emit of x::as and further more ctor of ou and variable o. +// The ODR-use of aw::c in x::ap should be treated as a host variable +// instead of device variable. + +// DEV-NEG-NOT: _ZN16TestConstexprVar1oE +namespace TestConstexprVar { +char o; +class ou { +public: + ou(char) { __builtin_strlen(&o); } +}; +template < typename ao > struct aw { static constexpr ao c; }; +class x { +protected: + typedef ou (*y)(const x *); + constexpr x(y ag) : ah(ag) {} + template < bool * > struct ak; + template < typename > struct al { + static bool am; + static ak< &am > an; + }; + template < typename ao > static x ap() { (void)aw< ao >::c; return x(nullptr); } + y ah; +}; +template < typename ao > bool x::al< ao >::am(&ap< ao >); +class ar : x { +public: + constexpr ar() : x(as) {} + static ou as(const x *) { return 0; } + al< ar > av; +}; } // Check the exact list of variables to ensure @_ZL2u4 is not among them. -// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE {{[^@]*}} @_ZL2u3 {{[^@]*}} @inline_var {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5 +// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE +// DEV-SAME: {{^[^@]*}} @_ZL2u3 +// DEV-SAME: {{^[^@]*}} @_ZZ4fun1vE11static_var1 +// DEV-SAME: {{^[^@]*}} @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2 +// DEV-SAME: {{^[^@]*}} @constexpr_var2b +// DEV-SAME: {{^[^@]*}} @inline_var +// DEV-SAME: {{^[^@]*}} @u1 +// DEV-SAME: {{^[^@]*}} @u2 +// DEV-SAME: {{^[^@]*}} @u5 +// DEV-SAME: {{^[^@]*$}} // HOST-DAG: hipRegisterVar{{.*}}@u1 // HOST-DAG: hipRegisterVar{{.*}}@u2 // HOST-DAG: hipRegisterVar{{.*}}@_ZL2u3 +// HOST-DAG: hipRegisterVar{{.*}}@constexpr_var2b // HOST-DAG: hipRegisterVar{{.*}}@u5 // HOST-DAG: hipRegisterVar{{.*}}@inline_var // HOST-DAG: hipRegisterVar{{.*}}@_Z10p_add_funcIiE +// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun1vE11static_var1 +// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun2vE11static_var2 +// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun2vE11static_var3 +// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2 +// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var1 // HOST-NEG-NOT: hipRegisterVar{{.*}}@ext_var // HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZL2u4 +// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var1a +// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var1b +// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var2a Index: clang/test/SemaCUDA/static-device-var.cu =================================================================== --- clang/test/SemaCUDA/static-device-var.cu +++ clang/test/SemaCUDA/static-device-var.cu @@ -1,16 +1,14 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ -// RUN: -emit-llvm -o - %s -fsyntax-only -verify=dev +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -o - %s -fsyntax-only -verify=dev,com -// RUN: %clang_cc1 -triple x86_64-gnu-linux \ -// RUN: -emit-llvm -o - %s -fsyntax-only -verify=host +// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ +// RUN: -emit-llvm -o - %s -fsyntax-only -verify=host,com // Checks allowed usage of file-scope and function-scope static variables. -// host-no-diagnostics - #include "Inputs/cuda.h" // Checks static variables are allowed in device functions. @@ -42,6 +40,28 @@ // dev-error@-1 {{reference to __host__ variable 'z' in __global__ function}} } +// Check dynamic initialization of static device variable is not allowed. + +namespace TestStaticVarInLambda { +class A { +public: + A(char *); +}; +class B { +public: + __device__ B(char *); +}; +void fun() { + (void) [](char *c) { + static A var1(c); + static __device__ B var2(c); + // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + (void) var1; + (void) var2; + }; +} +} + int* getDeviceSymbol(int *x); void foo() {