diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8328,6 +8328,10 @@ def err_ref_bad_target : Error< "reference to %select{__device__|__global__|__host__|__host__ __device__}0 " "%select{function|variable}1 %2 in %select{__device__|__global__|__host__|__host__ __device__}3 function">; +def note_cuda_const_var_unpromoted : Note< + "const variable cannot be emitted on device side due to dynamic initialization">; +def note_cuda_host_var : Note< + "host variable declared here">; def err_ref_bad_target_global_initializer : Error< "reference to %select{__device__|__global__|__host__|__host__ __device__}0 " "function %1 in global initializer">; diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -147,6 +147,9 @@ return CVT_Unified; if (Var->isConstexpr() && !hasExplicitAttr(Var)) return CVT_Both; + if (Var->getType().isConstQualified() && Var->hasAttr() && + !hasExplicitAttr(Var)) + return CVT_Both; if (Var->hasAttr() || Var->hasAttr() || Var->hasAttr() || Var->getType()->isCUDADeviceBuiltinSurfaceType() || @@ -549,47 +552,78 @@ return true; } +namespace { +enum CUDAInitializerCheckKind { + CICK_DeviceOrConstant, // Check initializer for device/constant variable + CICK_Shared, // Check initializer for shared variable +}; + +bool IsDependentVar(VarDecl *VD) { + if (VD->getType()->isDependentType()) + return true; + if (const auto *Init = VD->getInit()) + return Init->isValueDependent(); + return false; +} + +// Check whether a variable has an allowed initializer for a CUDA device side +// variable with global storage. \p VD may be a host variable to be checked for +// potential promotion to device side variable. +// +// CUDA/HIP allows only empty constructors as initializers for global +// variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all +// __shared__ variables whether they are local or not (they all are implicitly +// static in CUDA). One exception is that CUDA allows constant initializers +// for __constant__ and __device__ variables. +bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, + CUDAInitializerCheckKind CheckKind) { + assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); + assert(!IsDependentVar(VD) && "do not check dependent var"); + const Expr *Init = VD->getInit(); + auto IsEmptyInit = [&](const Expr *Init) { + if (!Init) + return true; + if (const auto *CE = dyn_cast(Init)) { + return S.isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); + } + return false; + }; + auto IsConstantInit = [&](const Expr *Init) { + assert(Init); + return Init->isConstantInitializer(S.Context, + VD->getType()->isReferenceType()); + }; + auto HasEmptyDtor = [&](VarDecl *VD) { + if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) + return S.isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); + return true; + }; + if (CheckKind == CICK_Shared) + return IsEmptyInit(Init) && HasEmptyDtor(VD); + return S.LangOpts.GPUAllowDeviceInit || + ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD)); +} +} // namespace + void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { - if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) + // Do not check dependent variables since the ctor/dtor/initializer are not + // determined. Do it after instantiation. + if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() || + IsDependentVar(VD)) return; const Expr *Init = VD->getInit(); - if (VD->hasAttr() || VD->hasAttr() || - VD->hasAttr()) { - if (LangOpts.GPUAllowDeviceInit) + bool IsSharedVar = VD->hasAttr(); + bool IsDeviceOrConstantVar = + !IsSharedVar && + (VD->hasAttr() || VD->hasAttr()); + if (IsDeviceOrConstantVar || IsSharedVar) { + if (HasAllowedCUDADeviceStaticInitializer( + *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant)) return; - bool AllowedInit = false; - if (const CXXConstructExpr *CE = dyn_cast(Init)) - AllowedInit = - isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); - // We'll allow constant initializers even if it's a non-empty - // constructor according to CUDA rules. This deviates from NVCC, - // but allows us to handle things like constexpr constructors. - if (!AllowedInit && - (VD->hasAttr() || VD->hasAttr())) { - auto *Init = VD->getInit(); - // isConstantInitializer cannot be called with dependent value, therefore - // we skip checking dependent value here. This is OK since - // checkAllowedCUDAInitializer is called again when the template is - // instantiated. - AllowedInit = - VD->getType()->isDependentType() || Init->isValueDependent() || - Init->isConstantInitializer(Context, - VD->getType()->isReferenceType()); - } - - // Also make sure that destructor, if there is one, is empty. - if (AllowedInit) - if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) - AllowedInit = - isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); - - if (!AllowedInit) { - Diag(VD->getLocation(), VD->hasAttr() - ? diag::err_shared_var_init - : diag::err_dynamic_var_init) - << Init->getSourceRange(); - VD->setInvalidDecl(); - } + Diag(VD->getLocation(), + IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init) + << Init->getSourceRange(); + VD->setInvalidDecl(); } else { // This is a host-side global variable. Check that the initializer is // callable from the host side. @@ -672,10 +706,19 @@ NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); } +// TODO: `__constant__` memory may be a limited resource for certain targets. +// A safeguard may be needed at the end of compilation pipeline if +// `__constant__` memory usage goes beyond limit. void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { - if (getLangOpts().CUDAIsDevice && VD->isConstexpr() && + // Do not promote dependent variables since the cotr/dtor/initializer are + // not determined. Do it after instantiation. + if (getLangOpts().CUDAIsDevice && !VD->hasAttr() && + !VD->hasAttr() && !VD->hasAttr() && (VD->isFileVarDecl() || VD->isStaticDataMember()) && - !VD->hasAttr()) { + !IsDependentVar(VD) && + (VD->isConstexpr() || (VD->getType().isConstQualified() && + HasAllowedCUDADeviceStaticInitializer( + *this, VD, CICK_DeviceOrConstant)))) { VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); } } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -7230,7 +7230,6 @@ case ConstexprSpecKind::Constexpr: NewVD->setConstexpr(true); - MaybeAddCUDAConstantAttr(NewVD); // C++1z [dcl.spec.constexpr]p1: // A static data member declared with the constexpr specifier is // implicitly an inline variable. @@ -12996,6 +12995,8 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { if (var->isInvalidDecl()) return; + MaybeAddCUDAConstantAttr(var); + if (getLangOpts().OpenCL) { // OpenCL v2.0 s6.12.5 - Every block variable declaration must have an // initialiser diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -17177,9 +17177,14 @@ // Diagnose ODR-use of host global variables in device functions. // Reference of device global variables in host functions is allowed // through shadow variables therefore it is not diagnosed. - if (SemaRef.LangOpts.CUDAIsDevice) + if (SemaRef.LangOpts.CUDAIsDevice) { SemaRef.targetDiag(Loc, diag::err_ref_bad_target) << /*host*/ 2 << /*variable*/ 1 << Var << UserTarget; + SemaRef.targetDiag(Var->getLocation(), + Var->getType().isConstQualified() + ? diag::note_cuda_const_var_unpromoted + : diag::note_cuda_host_var); + } } else if (VarTarget == Sema::CVT_Device && (UserTarget == Sema::CFT_Host || UserTarget == Sema::CFT_HostDevice) && diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -5034,7 +5034,6 @@ NewVar->setCXXForRangeDecl(OldVar->isCXXForRangeDecl()); NewVar->setObjCForDecl(OldVar->isObjCForDecl()); NewVar->setConstexpr(OldVar->isConstexpr()); - MaybeAddCUDAConstantAttr(NewVar); NewVar->setInitCapture(OldVar->isInitCapture()); NewVar->setPreviousDeclInSameBlockScope( OldVar->isPreviousDeclInSameBlockScope()); diff --git a/clang/test/CodeGenCUDA/device-use-host-var.cu b/clang/test/CodeGenCUDA/device-use-host-var.cu --- a/clang/test/CodeGenCUDA/device-use-host-var.cu +++ b/clang/test/CodeGenCUDA/device-use-host-var.cu @@ -1,5 +1,7 @@ -// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa \ +// RUN: %clang_cc1 -std=c++14 -triple amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -emit-llvm -o - -x hip %s | FileCheck %s +// RUN: %clang_cc1 -std=c++14 -triple amdgcn-amd-amdhsa \ +// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s | FileCheck -check-prefix=NEG %s #include "Inputs/cuda.h" @@ -7,34 +9,98 @@ int x; }; +// Check the situation of B has empty ctor but B has non-empty ctor. +// Make sure const B variables are not promoted to constant variables. +template +struct B { + T x; + B() {} + B(T _x) { x = _x; } + static const B y; +}; + +template<> +struct B { + int x; + B() { x = 1; } + static const B y; +}; + +template +const B B::y; + +const B B::y; + +template +T temp_fun(T x) { + return B::y.x; +} + +// Check template variable with empty default ctor but non-empty initializer +// ctor is not promoted. +template +const B b = B(-1); + constexpr int constexpr_var = 1; constexpr A constexpr_struct{2}; constexpr A constexpr_array[4] = {0, 0, 0, 3}; constexpr char constexpr_str[] = "abcd"; const int const_var = 4; +const A const_struct{5}; +const A const_array[] = {0, 0, 0, 6}; +const char const_str[] = "xyz"; + +// Check const variables used by host only are not emitted. +const int var_host_only = 7; // CHECK-DAG: @_ZL13constexpr_str.const = private unnamed_addr addrspace(4) constant [5 x i8] c"abcd\00" // CHECK-DAG: @_ZL13constexpr_var = internal addrspace(4) constant i32 1 // CHECK-DAG: @_ZL16constexpr_struct = internal addrspace(4) constant %struct.A { i32 2 } // CHECK-DAG: @_ZL15constexpr_array = internal addrspace(4) constant [4 x %struct.A] [%struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A { i32 3 }] -// CHECK-NOT: external +// CHECK-DAG: @_ZL9const_var = internal addrspace(4) constant i32 4 +// CHECK-DAG: @_ZL12const_struct = internal addrspace(4) constant %struct.A { i32 5 } +// CHECK-DAG: @_ZL11const_array = internal addrspace(4) constant [4 x %struct.A] [%struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A { i32 6 }] +// CHECK-DAG: @_ZL9const_str = internal addrspace(4) constant [4 x i8] c"xyz\00" + +// NEG-NOT: @_ZN1BIiE1yE +// NEG-NOT: @_Z1bIdE +// NEG-NOT: @_ZL13var_host_only +// NEG-NOT: external // CHECK-LABEL: define{{.*}}@_Z7dev_funPiPPKi // CHECK: store i32 1 // CHECK: store i32 2 // CHECK: store i32 3 -// CHECK: store i32 4 // CHECK: load i8, i8* getelementptr {{.*}} @_ZL13constexpr_str.const +// CHECK: store i32 4 +// CHECK: store i32 5 +// CHECK: store i32 6 +// CHECK: load i8, i8* getelementptr {{.*}} @_ZL9const_str // CHECK: store i32* {{.*}}@_ZL13constexpr_var // CHECK: store i32* getelementptr {{.*}} @_ZL16constexpr_struct // CHECK: store i32* getelementptr {{.*}} @_ZL15constexpr_array +// CHECK: store i32* {{.*}}@_ZL9const_var +// CHECK: store i32* getelementptr {{.*}} @_ZL12const_struct +// CHECK: store i32* getelementptr {{.*}} @_ZL11const_array __device__ void dev_fun(int *out, const int **out2) { *out = constexpr_var; *out = constexpr_struct.x; *out = constexpr_array[3].x; - *out = const_var; *out = constexpr_str[3]; + *out = const_var; + *out = const_struct.x; + *out = const_array[3].x; + *out = const_str[3]; *out2 = &constexpr_var; *out2 = &constexpr_struct.x; *out2 = &constexpr_array[3].x; + *out2 = &const_var; + *out2 = &const_struct.x; + *out2 = &const_array[3].x; +} + +void fun() { + temp_fun(1); + (void) b; + (void) var_host_only; } diff --git a/clang/test/SemaCUDA/device-use-host-var.cu b/clang/test/SemaCUDA/device-use-host-var.cu --- a/clang/test/SemaCUDA/device-use-host-var.cu +++ b/clang/test/SemaCUDA/device-use-host-var.cu @@ -5,35 +5,61 @@ #include "Inputs/cuda.h" +int func(); + struct A { int x; static int host_var; }; -int A::host_var; +int A::host_var; // dev-note {{host variable declared here}} namespace X { - int host_var; + int host_var; // dev-note {{host variable declared here}} } -static int static_host_var; +// struct with non-empty ctor. +struct B1 { + int x; + B1() { x = 1; } +}; + +// struct with non-empty dtor. +struct B2 { + int x; + B2() {} + ~B2() { x = 0; } +}; + +static int static_host_var; // dev-note {{host variable declared here}} __device__ int global_dev_var; __constant__ int global_constant_var; __shared__ int global_shared_var; -int global_host_var; +int global_host_var; // dev-note 8{{host variable declared here}} const int global_const_var = 1; constexpr int global_constexpr_var = 1; -int global_host_array[2] = {1, 2}; +int global_host_array[2] = {1, 2}; // dev-note {{host variable declared here}} const int global_const_array[2] = {1, 2}; constexpr int global_constexpr_array[2] = {1, 2}; -A global_host_struct_var{1}; +A global_host_struct_var{1}; // dev-note 2{{host variable declared here}} const A global_const_struct_var{1}; constexpr A global_constexpr_struct_var{1}; +// Check const host var initialized with non-empty ctor is not allowed in +// device function. +const B1 b1; // dev-note {{const variable cannot be emitted on device side due to dynamic initialization}} + +// Check const host var having non-empty dtor is not allowed in device function. +const B2 b2; // dev-note {{const variable cannot be emitted on device side due to dynamic initialization}} + +// Check const host var initialized by non-constant initializer is not allowed +// in device function. +const int b3 = func(); // dev-note {{const variable cannot be emitted on device side due to dynamic initialization}} + template __global__ void kernel(F f) { f(); } // dev-note2 {{called by 'kernel<(lambda}} @@ -53,11 +79,14 @@ *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}} *out = global_const_var; *out = global_constexpr_var; + *out = b1.x; // dev-error {{reference to __host__ variable 'b1' in __device__ function}} + *out = b2.x; // dev-error {{reference to __host__ variable 'b2' in __device__ function}} + *out = b3; // dev-error {{reference to __host__ variable 'b3' in __device__ function}} global_host_var = 1; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}} // Check reference of non-constexpr host variables are not allowed. int &ref_host_var = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}} - const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __device__ function}} + const int &ref_const_var = global_const_var; const int &ref_constexpr_var = global_constexpr_var; *out = ref_host_var; *out = ref_constexpr_var; @@ -65,18 +94,18 @@ // Check access member of non-constexpr struct type host variable is not allowed. *out = global_host_struct_var.x; // dev-error {{reference to __host__ variable 'global_host_struct_var' in __device__ function}} - *out = global_const_struct_var.x; // dev-error {{reference to __host__ variable 'global_const_struct_var' in __device__ function}} + *out = global_const_struct_var.x; *out = global_constexpr_struct_var.x; global_host_struct_var.x = 1; // dev-error {{reference to __host__ variable 'global_host_struct_var' in __device__ function}} // Check address taking of non-constexpr host variables is not allowed. int *p = &global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}} - const int *cp = &global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __device__ function}} + const int *cp = &global_const_var; const int *cp2 = &global_constexpr_var; // Check access elements of non-constexpr host array is not allowed. *out = global_host_array[1]; // dev-error {{reference to __host__ variable 'global_host_array' in __device__ function}} - *out = global_const_array[1]; // dev-error {{reference to __host__ variable 'global_const_array' in __device__ function}} + *out = global_const_array[1]; *out = global_constexpr_array[1]; // Check ODR-use of host variables in namespace is not allowed. @@ -103,7 +132,7 @@ int &ref_constant_var = global_constant_var; int &ref_shared_var = global_shared_var; const int &ref_constexpr_var = global_constexpr_var; - const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __global__ function}} + const int &ref_const_var = global_const_var; *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __global__ function}} *out = global_dev_var; @@ -126,7 +155,7 @@ int &ref_constant_var = global_constant_var; int &ref_shared_var = global_shared_var; const int &ref_constexpr_var = global_constexpr_var; - const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __host__ __device__ function}} + const int &ref_const_var = global_const_var; *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}} *out = global_dev_var; @@ -173,7 +202,7 @@ int &ref_constant_var = global_constant_var; int &ref_shared_var = global_shared_var; const int &ref_constexpr_var = global_constexpr_var; - const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __host__ __device__ function}} + const int &ref_const_var = global_const_var; *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}} // dev-error@-1 {{capture host variable 'out' by reference in device or host device lambda function}} @@ -199,7 +228,7 @@ int &ref_constant_var = global_constant_var; int &ref_shared_var = global_shared_var; const int &ref_constexpr_var = global_constexpr_var; - const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __host__ __device__ function}} + const int &ref_const_var = global_const_var; *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}} *out = global_dev_var; @@ -239,7 +268,7 @@ }; template<> -not_a_texture not_a_texture::ref; +not_a_texture not_a_texture::ref; // dev-note {{host variable declared here}} __device__ void test_not_a_texture() { not_a_texture inst; @@ -249,7 +278,7 @@ // Test static variable in host function used by device function. void test_static_var_host() { for (int i = 0; i < 10; i++) { - static int x; + static int x; // dev-note {{host variable declared here}} struct A { __device__ int f() { return x; // dev-error{{reference to __host__ variable 'x' in __device__ function}} diff --git a/clang/test/SemaCUDA/static-device-var.cu b/clang/test/SemaCUDA/static-device-var.cu --- a/clang/test/SemaCUDA/static-device-var.cu +++ b/clang/test/SemaCUDA/static-device-var.cu @@ -31,7 +31,7 @@ static __device__ int x; static __constant__ int y; -static int z; +static int z; // dev-note {{host variable declared here}} __global__ void kernel(int *a) { a[0] = x;