diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -43,6 +43,7 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/MapVector.h" @@ -999,6 +1000,9 @@ // Implicitly-declared type 'struct _GUID'. mutable TagDecl *MSGuidTagDecl = nullptr; + /// Keep track of CUDA/HIP static device variables referenced by host code. + llvm::DenseSet CUDAStaticDeviceVarReferencedByHost; + ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents, SelectorTable &sels, Builtin::Context &builtins); ASTContext(const ASTContext &) = delete; @@ -3030,6 +3034,9 @@ /// Return a new OMPTraitInfo object owned by this context. OMPTraitInfo &getNewOMPTraitInfo(); + /// Whether a C++ static variable should be externalized. + bool shouldExternalizeStaticVar(const Decl *D) const; + private: /// All OMPTraitInfo objects live in this collection, one per /// `pragma omp [begin] declare variant` directive. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -10325,12 +10325,17 @@ } else if (D->hasAttr()) { if (L == GVA_DiscardableODR) return GVA_StrongODR; - } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice && - D->hasAttr()) { + } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) { // Device-side functions with __global__ attribute must always be // visible externally so they can be launched from host. - if (L == GVA_DiscardableODR || L == GVA_Internal) + if (D->hasAttr() && + (L == GVA_DiscardableODR || L == GVA_Internal)) return GVA_StrongODR; + // Single source offloading languages like CUDA/HIP need to be able to + // access static device variables from host code of the same compilation + // unit. This is done by externalizing the static variable. + if (Context.shouldExternalizeStaticVar(D)) + return GVA_StrongExternal; } return L; } @@ -11185,3 +11190,11 @@ return DB << Section.Decl; return DB << "a prior #pragma section"; } + +bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const { + return !getLangOpts().GPURelocatableDeviceCode && + (D->hasAttr() || D->hasAttr()) && + isa(D) && cast(D)->isFileVarDecl() && + cast(D)->getStorageClass() == SC_Static && + CUDAStaticDeviceVarReferencedByHost.count(cast(D)); +} diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -17864,6 +17864,25 @@ if (Var->isInvalidDecl()) return; + // Record a CUDA/HIP static device/constant variable if it is referenced + // by host code. This is done conservatively, when the variable is referenced + // in any of the following contexts: + // - a non-function context + // - a host function + // - a host device function + // This also requires the reference of the static device/constant variable by + // host code to be visible in the device compilation for the compiler to be + // able to externalize the static device/constant variable. + if ((Var->hasAttr() || Var->hasAttr()) && + Var->isFileVarDecl() && Var->getStorageClass() == SC_Static) { + auto *CurContext = SemaRef.CurContext; + if (!CurContext || !isa(CurContext) || + cast(CurContext)->hasAttr() || + (!cast(CurContext)->hasAttr() && + !cast(CurContext)->hasAttr())) + SemaRef.getASTContext().CUDAStaticDeviceVarReferencedByHost.insert(Var); + } + auto *MSI = Var->getMemberSpecializationInfo(); TemplateSpecializationKind TSK = MSI ? MSI->getTemplateSpecializationKind() : Var->getTemplateSpecializationKind(); diff --git a/clang/test/CodeGenCUDA/constexpr-variables.cu b/clang/test/CodeGenCUDA/constexpr-variables.cu --- a/clang/test/CodeGenCUDA/constexpr-variables.cu +++ b/clang/test/CodeGenCUDA/constexpr-variables.cu @@ -19,7 +19,7 @@ // CXX14: @_ZN1Q2k2E = {{.*}}externally_initialized constant i32 6 // CXX17: @_ZN1Q2k2E = internal {{.*}}constant i32 6 // CXX14: @_ZN1Q2k1E = available_externally {{.*}}constant i32 5 - // CXX17: @_ZN1Q2k1E = linkonce_odr {{.*}}constant i32 5 + // CXX17: @_ZN1Q2k1E = {{.*}} externally_initialized constant i32 5 static constexpr int k1 = 5; static constexpr int k2 = 6; }; @@ -30,14 +30,14 @@ template struct X { // CXX14: @_ZN1XIiE1aE = available_externally {{.*}}constant i32 123 - // CXX17: @_ZN1XIiE1aE = linkonce_odr {{.*}}constant i32 123 + // CXX17: @_ZN1XIiE1aE = {{.*}}externally_initialized constant i32 123 static constexpr int a = 123; }; __constant__ const int &use_X_a = X::a; template struct A { // CXX14: @_ZN1AIiLi1ELi2EE1xE = available_externally {{.*}}constant i32 2 - // CXX17: @_ZN1AIiLi1ELi2EE1xE = linkonce_odr {{.*}}constant i32 2 + // CXX17: @_ZN1AIiLi1ELi2EE1xE = {{.*}}externally_initialized constant i32 2 constexpr static T x = a * b; }; __constant__ const int &y = A::x; diff --git a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu @@ -0,0 +1,94 @@ +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=DEV %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=HOST %s + +#include "Inputs/cuda.h" + +// Test function scope static device variable, which should not be externalized. +// DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1 + +// Check a static device variable referenced by host function is externalized. +// DEV-DAG: @_ZL1x = addrspace(1) externally_initialized global i32 0 +// HOST-DAG: @_ZL1x = internal global i32 undef +// HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00" + +static __device__ int x; + +// Check a static device variables referenced only by device functions and kernels +// is not externalized. +// DEV-DAG: @_ZL2x2 = internal addrspace(1) global i32 0 +static __device__ int x2; + +// Check a static device variable referenced by host device function is externalized. +// DEV-DAG: @_ZL2x3 = addrspace(1) externally_initialized global i32 0 +static __device__ int x3; + +// Check a static device variable referenced in file scope is externalized. +// DEV-DAG: @_ZL2x4 = addrspace(1) externally_initialized global i32 0 +static __device__ int x4; +int& x4_ref = x4; + +// Check a static device variable in anonymous namespace. +// DEV-DAG: @_ZN12_GLOBAL__N_12x5E = addrspace(1) externally_initialized global i32 0 +namespace { +static __device__ int x5; +} + +// Check a static constant variable referenced by host is externalized. +// DEV-DAG: @_ZL1y = addrspace(4) externally_initialized global i32 0 +// HOST-DAG: @_ZL1y = internal global i32 undef +// HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" + +static __constant__ int y; + +// Test static host variable, which should not be externalized nor registered. +// HOST-DAG: @_ZL1z = internal global i32 0 +// DEV-NOT: @_ZL1z +static int z; + +// Test static device variable in inline function, which should not be +// externalized nor registered. +// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat + +inline __device__ void devfun(const int ** b) { + const static int p = 2; + b[0] = &p; + b[1] = &x2; +} + +__global__ void kernel(int *a, const int **b) { + const static int w = 1; + a[0] = x; + a[1] = y; + a[2] = x2; + a[3] = x3; + a[4] = x4; + a[5] = x5; + b[0] = &w; + devfun(b); +} + +__host__ __device__ void hdf(int *a) { + a[0] = x3; +} + +int* getDeviceSymbol(int *x); + +void foo(int *a) { + getDeviceSymbol(&x); + getDeviceSymbol(&x5); + getDeviceSymbol(&y); + z = 123; +} + +// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] +// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p