Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -2364,9 +2364,28 @@ } // Emit CUDA/HIP static device variables referenced by host code only. + // Note we should not clear CUDADeviceVarODRUsedByHost since it is still + // needed for further handling. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) - for (const auto *V : getContext().CUDADeviceVarODRUsedByHost) - DeferredDeclsToEmit.push_back(V); + for (const auto *V : getContext().CUDADeviceVarODRUsedByHost) { + // ToDo: The user of ODR-used variables may not be emitted on host side. + // There needs a more accurate way to determine whether a device side + // variable ODR-used by host only should be emitted on device side. + // + // Currently we do it conservatively. However this does not work well + // with implicit constant variables since use of implicit constant + // variable in host function cannot be easily differentiated from + // use of the host variable with the same name. Therefore we do not + // force emit implicit constant variable. + auto HasImplicitConstantAttr = [](const VarDecl *Var) { + auto *A = Var->getAttr(); + if (!A) + return false; + return A->isImplicit(); + }; + if (!HasImplicitConstantAttr(V)) + DeferredDeclsToEmit.push_back(V); + } // Stop if we're out of both deferred vtables and deferred declarations. if (DeferredDeclsToEmit.empty()) Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -637,7 +637,8 @@ void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { if (getLangOpts().CUDAIsDevice && VD->isConstexpr() && - (VD->isFileVarDecl() || VD->isStaticDataMember())) { + (VD->isFileVarDecl() || VD->isStaticDataMember()) && + !VD->hasAttr()) { VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); } } Index: clang/lib/Sema/SemaDeclAttr.cpp =================================================================== --- clang/lib/Sema/SemaDeclAttr.cpp +++ clang/lib/Sema/SemaDeclAttr.cpp @@ -4410,6 +4410,13 @@ S.Diag(AL.getLoc(), diag::err_cuda_nonstatic_constdev); return; } + // constexpr variable may already get an implicit constant attr, which should + // be replaced by the explicit constant attr. + if (auto *A = D->getAttr()) { + if (!A->isImplicit()) + return; + D->dropAttr(); + } D->addAttr(::new (S.Context) CUDAConstantAttr(S.Context, AL)); } Index: clang/test/AST/ast-dump-constant-var.cu =================================================================== --- /dev/null +++ clang/test/AST/ast-dump-constant-var.cu @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -std=c++14 -ast-dump -x hip %s | FileCheck -check-prefixes=CHECK,HOST %s +// RUN: %clang_cc1 -std=c++14 -ast-dump -fcuda-is-device -x hip %s | FileCheck -check-prefixes=CHECK,DEV %s + +#include "Inputs/cuda.h" + +// CHECK-LABEL: VarDecl {{.*}} m1 'int' +// CHECK-NEXT: CUDAConstantAttr {{.*}}cuda.h +__constant__ int m1; + +// CHECK-LABEL: VarDecl {{.*}} m2 'int' +// CHECK-NEXT: CUDAConstantAttr {{.*}}cuda.h +// CHECK-NOT: CUDAConstantAttr +__constant__ __constant__ int m2; + +// CHECK-LABEL: VarDecl {{.*}} m3 'const int' +// HOST-NOT: CUDAConstantAttr +// DEV-NOT: CUDAConstantAttr {{.*}}cuda.h +// DEV: CUDAConstantAttr {{.*}}Implicit +// DEV-NOT: CUDAConstantAttr {{.*}}cuda.h +constexpr int m3 = 1; + +// CHECK-LABEL: VarDecl {{.*}} m3a 'const int' +// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit +// CHECK: CUDAConstantAttr {{.*}}cuda.h +// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit +constexpr __constant__ int m3a = 2; + +// CHECK-LABEL: VarDecl {{.*}} m3b 'const int' +// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit +// CHECK: CUDAConstantAttr {{.*}}cuda.h +// CHECK-NOT: CUDAConstantAttr {{.*}}Implicit +__constant__ constexpr int m3b = 3; Index: clang/test/CodeGenCUDA/host-used-device-var.cu =================================================================== --- clang/test/CodeGenCUDA/host-used-device-var.cu +++ clang/test/CodeGenCUDA/host-used-device-var.cu @@ -66,8 +66,22 @@ template __device__ func_t p_add_func = add_func; +// Check non-constant constexpr variables ODR-used by host code only is not emitted. +// DEV-NEG-NOT: constexpr_var1a +// DEV-NEG-NOT: constexpr_var1b +constexpr int constexpr_var1a = 1; +inline constexpr int constexpr_var1b = 1; + +// Check constant constexpr variables ODR-used by host code only. +// Non-inline constexpr variable has internal linkage, therefore it is not accessible by host and not kept. +// Inline constexpr variable has linkonce_ord linkage, therefore it can be accessed by host and kept. +// DEV-NEG-NOT: constexpr_var2a +// DEV-DAG: @constexpr_var2b = linkonce_odr addrspace(4) externally_initialized constant i32 2 +__constant__ constexpr int constexpr_var2a = 2; +inline __constant__ constexpr int constexpr_var2b = 2; + void use(func_t p); -void use(int *p); +void use(const int *p); void fun1() { use(&u1); @@ -76,20 +90,58 @@ use(&ext_var); use(&inline_var); use(p_add_func); + use(&constexpr_var1a); + use(&constexpr_var1b); + use(&constexpr_var2a); + use(&constexpr_var2b); } __global__ void kern1(int **x) { *x = &u4; } +// Check implicit constant variable ODR-used by host code is not emitted. +// DEV-NEG-NOT: _ZN16TestConstexprVar1oE +namespace TestConstexprVar { +char o; +class ou { +public: + ou(char) { __builtin_strlen(&o); } +}; +template < typename ao > struct aw { static constexpr ao c; }; +class x { +protected: + typedef ou (*y)(const x *); + constexpr x(y ag) : ah(ag) {} + template < bool * > struct ak; + template < typename > struct al { + static bool am; + static ak< &am > an; + }; + template < typename ao > static x ap() { (void)aw< ao >::c; return x(nullptr); } + y ah; +}; +template < typename ao > bool x::al< ao >::am(&ap< ao >); +class ar : x { +public: + constexpr ar() : x(as) {} + static ou as(const x *) { return 0; } + al< ar > av; +}; +} + // Check the exact list of variables to ensure @_ZL2u4 is not among them. -// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE {{[^@]*}} @_ZL2u3 {{[^@]*}} @inline_var {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5 +// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE {{[^@]*}} @_ZL2u3 {{[^@]*}} @constexpr_var2b {{[^@]*}} @inline_var {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5 // HOST-DAG: hipRegisterVar{{.*}}@u1 // HOST-DAG: hipRegisterVar{{.*}}@u2 // HOST-DAG: hipRegisterVar{{.*}}@_ZL2u3 +// HOST-DAG: hipRegisterVar{{.*}}@constexpr_var2b // HOST-DAG: hipRegisterVar{{.*}}@u5 // HOST-DAG: hipRegisterVar{{.*}}@inline_var // HOST-DAG: hipRegisterVar{{.*}}@_Z10p_add_funcIiE // HOST-NEG-NOT: hipRegisterVar{{.*}}@ext_var // HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZL2u4 +// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var1a +// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var1b +// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var2a