Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -147,6 +147,9 @@ return CVT_Unified; if (Var->isConstexpr() && !hasExplicitAttr(Var)) return CVT_Both; + if (Var->getType().isConstQualified() && Var->hasAttr() && + !hasExplicitAttr(Var)) + return CVT_Both; if (Var->hasAttr() || Var->hasAttr() || Var->hasAttr() || Var->getType()->isCUDADeviceBuiltinSurfaceType() || @@ -549,47 +552,72 @@ return true; } +namespace { +enum CUDAInitializerCheckKind { + CICK_DeviceOrConstant, // Check initializer for device/constant variable + CICK_Shared, // Check initializer for shared variable +}; + +// Check whether a variable has an allowed initializer for a CUDA device side +// variable with global storage. \p VD may be a host variable to be checked for +// potential promotion to device side variable. +// +// CUDA/HIP allows only empty constructors as initializers for global +// variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all +// __shared__ variables whether they are local or not (they all are implicitly +// static in CUDA). One exception is that CUDA allows constant initializers +// for __constant__ and __device__ variables. +bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, + CUDAInitializerCheckKind CheckKind) { + assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); + const Expr *Init = VD->getInit(); + auto IsEmptyInit = [&](const Expr *Init) { + if (!Init) + return true; + if (const auto *CE = dyn_cast(Init)) { + return S.isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); + } + return false; + }; + // isConstantInitializer cannot be called with dependent value, therefore + // we skip checking dependent value here. This is OK since + // IsAllowedCUDAStaticInitializer is called again when the template is + // instantiated. + auto IsDependentOrConstantInit = [&](const Expr *Init) { + assert(Init); + return VD->getType()->isDependentType() || Init->isValueDependent() || + Init->isConstantInitializer(S.Context, + VD->getType()->isReferenceType()); + }; + auto HasEmptyDtor = [&](VarDecl *VD) { + if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) + return S.isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); + return true; + }; + if (CheckKind == CICK_Shared) + return IsEmptyInit(Init) && HasEmptyDtor(VD); + return S.LangOpts.GPUAllowDeviceInit || + ((IsEmptyInit(Init) || IsDependentOrConstantInit(Init)) && + HasEmptyDtor(VD)); +} +} // namespace + void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) return; const Expr *Init = VD->getInit(); - if (VD->hasAttr() || VD->hasAttr() || - VD->hasAttr()) { - if (LangOpts.GPUAllowDeviceInit) + bool IsSharedVar = VD->hasAttr(); + bool IsDeviceOrConstantVar = + !IsSharedVar && + (VD->hasAttr() || VD->hasAttr()); + if (IsDeviceOrConstantVar || IsSharedVar) { + if (HasAllowedCUDADeviceStaticInitializer( + *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant)) return; - bool AllowedInit = false; - if (const CXXConstructExpr *CE = dyn_cast(Init)) - AllowedInit = - isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); - // We'll allow constant initializers even if it's a non-empty - // constructor according to CUDA rules. This deviates from NVCC, - // but allows us to handle things like constexpr constructors. - if (!AllowedInit && - (VD->hasAttr() || VD->hasAttr())) { - auto *Init = VD->getInit(); - // isConstantInitializer cannot be called with dependent value, therefore - // we skip checking dependent value here. This is OK since - // checkAllowedCUDAInitializer is called again when the template is - // instantiated. - AllowedInit = - VD->getType()->isDependentType() || Init->isValueDependent() || - Init->isConstantInitializer(Context, - VD->getType()->isReferenceType()); - } - - // Also make sure that destructor, if there is one, is empty. - if (AllowedInit) - if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) - AllowedInit = - isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); - - if (!AllowedInit) { - Diag(VD->getLocation(), VD->hasAttr() - ? diag::err_shared_var_init - : diag::err_dynamic_var_init) - << Init->getSourceRange(); - VD->setInvalidDecl(); - } + Diag(VD->getLocation(), + IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init) + << Init->getSourceRange(); + VD->setInvalidDecl(); } else { // This is a host-side global variable. Check that the initializer is // callable from the host side. @@ -673,9 +701,12 @@ } void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { - if (getLangOpts().CUDAIsDevice && VD->isConstexpr() && + if (getLangOpts().CUDAIsDevice && !VD->hasAttr() && + !VD->hasAttr() && !VD->hasAttr() && (VD->isFileVarDecl() || VD->isStaticDataMember()) && - !VD->hasAttr()) { + (VD->isConstexpr() || (VD->getType().isConstQualified() && + HasAllowedCUDADeviceStaticInitializer( + *this, VD, CICK_DeviceOrConstant)))) { VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); } } Index: clang/lib/Sema/SemaDecl.cpp =================================================================== --- clang/lib/Sema/SemaDecl.cpp +++ clang/lib/Sema/SemaDecl.cpp @@ -12957,6 +12957,8 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { if (var->isInvalidDecl()) return; + MaybeAddCUDAConstantAttr(var); + if (getLangOpts().OpenCL) { // OpenCL v2.0 s6.12.5 - Every block variable declaration must have an // initialiser Index: clang/test/CodeGenCUDA/device-use-host-var.cu =================================================================== --- clang/test/CodeGenCUDA/device-use-host-var.cu +++ clang/test/CodeGenCUDA/device-use-host-var.cu @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -emit-llvm -o - -x hip %s | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa \ +// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s | FileCheck -check-prefix=NEG %s #include "Inputs/cuda.h" @@ -12,29 +14,49 @@ constexpr A constexpr_array[4] = {0, 0, 0, 3}; constexpr char constexpr_str[] = "abcd"; const int const_var = 4; +const A const_struct{5}; +const A const_array[] = {0, 0, 0, 6}; +const char const_str[] = "xyz"; // CHECK-DAG: @_ZL13constexpr_str.const = private unnamed_addr addrspace(4) constant [5 x i8] c"abcd\00" // CHECK-DAG: @_ZL13constexpr_var = internal addrspace(4) constant i32 1 // CHECK-DAG: @_ZL16constexpr_struct = internal addrspace(4) constant %struct.A { i32 2 } // CHECK-DAG: @_ZL15constexpr_array = internal addrspace(4) constant [4 x %struct.A] [%struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A { i32 3 }] -// CHECK-NOT: external +// CHECK-DAG: @_ZL9const_var = internal addrspace(4) constant i32 4 +// CHECK-DAG: @_ZL12const_struct = internal addrspace(4) constant %struct.A { i32 5 } +// CHECK-DAG: @_ZL11const_array = internal addrspace(4) constant [4 x %struct.A] [%struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A zeroinitializer, %struct.A { i32 6 }] +// CHECK-DAG: @_ZL9const_str = internal addrspace(4) constant [4 x i8] c"xyz\00" + +// NEG-NOT: external // CHECK-LABEL: define{{.*}}@_Z7dev_funPiPPKi // CHECK: store i32 1 // CHECK: store i32 2 // CHECK: store i32 3 -// CHECK: store i32 4 // CHECK: load i8, i8* getelementptr {{.*}} @_ZL13constexpr_str.const +// CHECK: store i32 4 +// CHECK: store i32 5 +// CHECK: store i32 6 +// CHECK: load i8, i8* getelementptr {{.*}} @_ZL9const_str // CHECK: store i32* {{.*}}@_ZL13constexpr_var // CHECK: store i32* getelementptr {{.*}} @_ZL16constexpr_struct // CHECK: store i32* getelementptr {{.*}} @_ZL15constexpr_array +// CHECK: store i32* {{.*}}@_ZL9const_var +// CHECK: store i32* getelementptr {{.*}} @_ZL12const_struct +// CHECK: store i32* getelementptr {{.*}} @_ZL11const_array __device__ void dev_fun(int *out, const int **out2) { *out = constexpr_var; *out = constexpr_struct.x; *out = constexpr_array[3].x; - *out = const_var; *out = constexpr_str[3]; + *out = const_var; + *out = const_struct.x; + *out = const_array[3].x; + *out = const_str[3]; *out2 = &constexpr_var; *out2 = &constexpr_struct.x; *out2 = &constexpr_array[3].x; + *out2 = &const_var; + *out2 = &const_struct.x; + *out2 = &const_array[3].x; } Index: clang/test/SemaCUDA/device-use-host-var.cu =================================================================== --- clang/test/SemaCUDA/device-use-host-var.cu +++ clang/test/SemaCUDA/device-use-host-var.cu @@ -5,6 +5,8 @@ #include "Inputs/cuda.h" +int func(); + struct A { int x; static int host_var; @@ -16,6 +18,19 @@ int host_var; } +// struct with non-empty ctor. +struct B1 { + int x; + B1() { x = 1; } +}; + +// struct with non-empty dtor. +struct B2 { + int x; + B2() {} + ~B2() { x = 0; } +}; + static int static_host_var; __device__ int global_dev_var; @@ -34,6 +49,17 @@ const A global_const_struct_var{1}; constexpr A global_constexpr_struct_var{1}; +// Check const host var initialized with non-empty ctor is not allowed in +// device function. +const B1 b1; + +// Check const host var having non-empty dtor is not allowed in device function. +const B2 b2; + +// Check const host var initialized by non-constant initializer is not allowed +// in device function. +const int b3 = func(); + template __global__ void kernel(F f) { f(); } // dev-note2 {{called by 'kernel<(lambda}} @@ -53,11 +79,14 @@ *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}} *out = global_const_var; *out = global_constexpr_var; + *out = b1.x; // dev-error {{reference to __host__ variable 'b1' in __device__ function}} + *out = b2.x; // dev-error {{reference to __host__ variable 'b2' in __device__ function}} + *out = b3; // dev-error {{reference to __host__ variable 'b3' in __device__ function}} global_host_var = 1; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}} // Check reference of non-constexpr host variables are not allowed. int &ref_host_var = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}} - const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __device__ function}} + const int &ref_const_var = global_const_var; const int &ref_constexpr_var = global_constexpr_var; *out = ref_host_var; *out = ref_constexpr_var; @@ -65,18 +94,18 @@ // Check access member of non-constexpr struct type host variable is not allowed. *out = global_host_struct_var.x; // dev-error {{reference to __host__ variable 'global_host_struct_var' in __device__ function}} - *out = global_const_struct_var.x; // dev-error {{reference to __host__ variable 'global_const_struct_var' in __device__ function}} + *out = global_const_struct_var.x; *out = global_constexpr_struct_var.x; global_host_struct_var.x = 1; // dev-error {{reference to __host__ variable 'global_host_struct_var' in __device__ function}} // Check address taking of non-constexpr host variables is not allowed. int *p = &global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __device__ function}} - const int *cp = &global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __device__ function}} + const int *cp = &global_const_var; const int *cp2 = &global_constexpr_var; // Check access elements of non-constexpr host array is not allowed. *out = global_host_array[1]; // dev-error {{reference to __host__ variable 'global_host_array' in __device__ function}} - *out = global_const_array[1]; // dev-error {{reference to __host__ variable 'global_const_array' in __device__ function}} + *out = global_const_array[1]; *out = global_constexpr_array[1]; // Check ODR-use of host variables in namespace is not allowed. @@ -103,7 +132,7 @@ int &ref_constant_var = global_constant_var; int &ref_shared_var = global_shared_var; const int &ref_constexpr_var = global_constexpr_var; - const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __global__ function}} + const int &ref_const_var = global_const_var; *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __global__ function}} *out = global_dev_var; @@ -126,7 +155,7 @@ int &ref_constant_var = global_constant_var; int &ref_shared_var = global_shared_var; const int &ref_constexpr_var = global_constexpr_var; - const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __host__ __device__ function}} + const int &ref_const_var = global_const_var; *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}} *out = global_dev_var; @@ -173,7 +202,7 @@ int &ref_constant_var = global_constant_var; int &ref_shared_var = global_shared_var; const int &ref_constexpr_var = global_constexpr_var; - const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __host__ __device__ function}} + const int &ref_const_var = global_const_var; *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}} // dev-error@-1 {{capture host variable 'out' by reference in device or host device lambda function}} @@ -199,7 +228,7 @@ int &ref_constant_var = global_constant_var; int &ref_shared_var = global_shared_var; const int &ref_constexpr_var = global_constexpr_var; - const int &ref_const_var = global_const_var; // dev-error {{reference to __host__ variable 'global_const_var' in __host__ __device__ function}} + const int &ref_const_var = global_const_var; *out = global_host_var; // dev-error {{reference to __host__ variable 'global_host_var' in __host__ __device__ function}} *out = global_dev_var;