diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -299,6 +299,10 @@ /// This is lazily created. This is intentionally not serialized. mutable llvm::StringMap StringLiteralCache; + /// MD5 hash of CUID. It is calculated when first used and cached by this + /// data member. + mutable std::string CUIDHash; + /// Representation of a "canonical" template template parameter that /// is used in canonical template names. class CanonicalTemplateTemplateParm : public llvm::FoldingSetNode { @@ -3117,6 +3121,8 @@ /// Whether a C++ static variable should be externalized. bool shouldExternalizeStaticVar(const Decl *D) const; + StringRef getCUIDHash() const; + private: /// All OMPTraitInfo objects live in this collection, one per /// `pragma omp [begin] declare variant` directive. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -84,6 +84,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include @@ -10645,7 +10646,10 @@ return GVA_StrongODR; // Single source offloading languages like CUDA/HIP need to be able to // access static device variables from host code of the same compilation - // unit. This is done by externalizing the static variable. + // unit. This is done by externalizing the static variable with a shared + // name between the host and device compilation which is the same for the + // same compilation unit whereas different among different compilation + // units. if (Context.shouldExternalizeStaticVar(D)) return GVA_StrongExternal; } @@ -11533,10 +11537,8 @@ !D->getAttr()->isImplicit()); // CUDA/HIP: static managed variables need to be externalized since it is // a declaration in IR, therefore cannot have internal linkage. - // ToDo: externalize static variables for -fgpu-rdc. return IsStaticVar && - (D->hasAttr() || - (!getLangOpts().GPURelocatableDeviceCode && IsExplicitDeviceVar)); + (D->hasAttr() || IsExplicitDeviceVar); } bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const { @@ -11544,3 +11546,12 @@ (D->hasAttr() || CUDAStaticDeviceVarReferencedByHost.count(cast(D))); } + +StringRef ASTContext::getCUIDHash() const { + if (!CUIDHash.empty()) + return CUIDHash; + if (LangOpts.CUID.empty()) + return StringRef(); + CUIDHash = llvm::utohexstr(llvm::MD5Hash(LangOpts.CUID), /*LowerCase=*/true); + return CUIDHash; +} diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -255,6 +255,17 @@ DeviceSideName = std::string(Out.str()); } else DeviceSideName = std::string(ND->getIdentifier()->getName()); + + // Make unique name for device side static file-scope variable for HIP. + if (CGM.getContext().shouldExternalizeStaticVar(ND) && + CGM.getLangOpts().GPURelocatableDeviceCode && + !CGM.getLangOpts().CUID.empty()) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << DeviceSideName; + CGM.printPostfixForExternalizedStaticVar(Out); + DeviceSideName = std::string(Out.str()); + } return DeviceSideName; } diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1422,6 +1422,10 @@ TBAAAccessInfo *TBAAInfo = nullptr); bool stopAutoInit(); + /// Print the postfix for externalized static variable for single source + /// offloading languages CUDA and HIP. + void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const; + private: llvm::Constant *GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1184,6 +1184,11 @@ } } + // Make unique name for device side static file-scope variable for HIP. + if (CGM.getContext().shouldExternalizeStaticVar(ND) && + CGM.getLangOpts().GPURelocatableDeviceCode && + CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty()) + CGM.printPostfixForExternalizedStaticVar(Out); return std::string(Out.str()); } @@ -1241,9 +1246,16 @@ } } - auto FoundName = MangledDeclNames.find(CanonicalGD); - if (FoundName != MangledDeclNames.end()) - return FoundName->second; + // In CUDA/HIP device compilation with -fgpu-rdc, the mangled name of a + // static device variable depends on whether the variable is referenced by + // a host or device host function. Therefore the mangled name cannot be + // cached. + if (!LangOpts.CUDAIsDevice || + !getContext().mayExternalizeStaticVar(GD.getDecl())) { + auto FoundName = MangledDeclNames.find(CanonicalGD); + if (FoundName != MangledDeclNames.end()) + return FoundName->second; + } // Keep the first result in the case of a mangling collision. const auto *ND = cast(GD.getDecl()); @@ -6249,3 +6261,8 @@ } return false; } + +void CodeGenModule::printPostfixForExternalizedStaticVar( + llvm::raw_ostream &OS) const { + OS << ".static." << getContext().getCUIDHash(); +} diff --git a/clang/test/CodeGenCUDA/device-var-linkage.cu b/clang/test/CodeGenCUDA/device-var-linkage.cu --- a/clang/test/CodeGenCUDA/device-var-linkage.cu +++ b/clang/test/CodeGenCUDA/device-var-linkage.cu @@ -2,13 +2,13 @@ // RUN: -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=DEV,NORDC %s // RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ -// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s \ +// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=DEV,RDC %s // RUN: %clang_cc1 -triple nvptx \ // RUN: -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s // RUN: %clang_cc1 -triple nvptx \ -// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s \ +// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=HOST,RDC-H %s #include "Inputs/cuda.h" @@ -37,14 +37,15 @@ extern __managed__ int ev3; // NORDC-DAG: @_ZL3sv1 = dso_local addrspace(1) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv1 = internal addrspace(1) global i32 0 +// RDC-DAG: @_ZL3sv1.static.[[HASH:.*]] = dso_local addrspace(1) externally_initialized global i32 0 // HOST-DAG: @_ZL3sv1 = internal global i32 undef static __device__ int sv1; // NORDC-DAG: @_ZL3sv2 = dso_local addrspace(4) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv2 = internal addrspace(4) global i32 0 +// RDC-DAG: @_ZL3sv2.static.[[HASH]] = dso_local addrspace(4) externally_initialized global i32 0 // HOST-DAG: @_ZL3sv2 = internal global i32 undef static __constant__ int sv2; -// DEV-DAG: @_ZL3sv3 = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null +// NORDC-DAG: @_ZL3sv3 = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null +// RDC-DAG: @_ZL3sv3.static.[[HASH]] = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null // HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null static __managed__ int sv3; diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu --- a/clang/test/CodeGenCUDA/managed-var.cu +++ b/clang/test/CodeGenCUDA/managed-var.cu @@ -2,19 +2,24 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ // RUN: -emit-llvm -o - -x hip %s | FileCheck \ -// RUN: -check-prefixes=COMMON,DEV %s +// RUN: -check-prefixes=COMMON,DEV,NORDC-D %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ -// RUN: -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \ -// RUN: -check-prefixes=COMMON,DEV %s +// RUN: -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.dev +// RUN: cat %t.dev | FileCheck -check-prefixes=COMMON,DEV,RDC-D %s // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ // RUN: -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=COMMON,HOST,NORDC %s // RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \ -// RUN: -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \ -// RUN: -check-prefixes=COMMON,HOST,RDC %s +// RUN: -emit-llvm -fgpu-rdc -cuid=abc -o - -x hip %s > %t.host +// RUN: cat %t.host | FileCheck -check-prefixes=COMMON,HOST,RDC %s + +// Check device and host compilation use the same postfix for static +// variable name. + +// RUN: cat %t.dev %t.host | FileCheck -check-prefix=POSTFIX %s #include "Inputs/cuda.h" @@ -45,10 +50,17 @@ // HOST-DAG: @ex = external externally_initialized global i32* extern __managed__ int ex; -// DEV-DAG: @_ZL2sx.managed = dso_local addrspace(1) externally_initialized global i32 1, align 4 -// DEV-DAG: @_ZL2sx = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null +// NORDC-D-DAG: @_ZL2sx.managed = dso_local addrspace(1) externally_initialized global i32 1, align 4 +// NORDC-D-DAG: @_ZL2sx = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null +// RDC-D-DAG: @_ZL2sx.static.[[HASH:.*]].managed = dso_local addrspace(1) externally_initialized global i32 1, align 4 +// RDC-D-DAG: @_ZL2sx.static.[[HASH]] = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null // HOST-DAG: @_ZL2sx.managed = internal global i32 1 // HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null +// NORDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx\00" +// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH:.*]]\00" + +// POSTFIX: @_ZL2sx.static.[[HASH:.*]] = dso_local addrspace(1) externally_initialized global i32 addrspace(1)* null +// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH]]\00" static __managed__ int sx = 1; // DEV-DAG: @llvm.compiler.used @@ -154,6 +166,6 @@ } // HOST-DAG: __hipRegisterManagedVar({{.*}}@x {{.*}}@x.managed {{.*}}@[[DEVNAMEX]]{{.*}}, i64 4, i32 4) -// HOST-DAG: __hipRegisterManagedVar({{.*}}@_ZL2sx {{.*}}@_ZL2sx.managed +// HOST-DAG: __hipRegisterManagedVar({{.*}}@_ZL2sx {{.*}}@_ZL2sx.managed {{.*}}@[[DEVNAMESX]] // HOST-NOT: __hipRegisterManagedVar({{.*}}@ex {{.*}}@ex.managed // HOST-DAG: declare void @__hipRegisterManagedVar(i8**, i8*, i8*, i8*, i64, i32) diff --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -0,0 +1,97 @@ +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=DEV,INT-DEV %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux \ +// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=HOST,INT-HOST %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ +// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev +// RUN: cat %t.dev | FileCheck -check-prefixes=DEV,EXT-DEV %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ +// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host +// RUN: cat %t.host | FileCheck -check-prefixes=HOST,EXT-HOST %s + +// Check host and device compilations use the same postfixes for static +// variable names. + +// RUN: cat %t.dev %t.host | FileCheck -check-prefix=POSTFIX %s + +#include "Inputs/cuda.h" + +// Test function scope static device variable, which should not be externalized. +// DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1 + + +// HOST-DAG: @_ZL1x = internal global i32 undef +// HOST-DAG: @_ZL1y = internal global i32 undef + +// Test normal static device variables +// INT-DEV-DAG: @_ZL1x = dso_local addrspace(1) externally_initialized global i32 0 +// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00" + +// Test externalized static device variables +// EXT-DEV-DAG: @_ZL1x.static.[[HASH:.*]] = dso_local addrspace(1) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH:.*]]\00" + +// POSTFIX: @_ZL1x.static.[[HASH:.*]] = dso_local addrspace(1) externally_initialized global i32 0 +// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH]]\00" + +static __device__ int x; + +// Test static device variables not used by host code should not be externalized +// DEV-DAG: @_ZL2x2 = internal addrspace(1) global i32 0 + +static __device__ int x2; + +// Test normal static device variables +// INT-DEV-DAG: @_ZL1y = dso_local addrspace(4) externally_initialized global i32 0 +// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" + +// Test externalized static device variables +// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = dso_local addrspace(4) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00" + +static __constant__ int y; + +// Test static host variable, which should not be externalized nor registered. +// HOST-DAG: @_ZL1z = internal global i32 0 +// DEV-NOT: @_ZL1z +static int z; + +// Test static device variable in inline function, which should not be +// externalized nor registered. +// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat + +inline __device__ void devfun(const int ** b) { + const static int p = 2; + b[0] = &p; +} + +__global__ void kernel(int *a, const int **b) { + const static int w = 1; + a[0] = x; + a[1] = y; + b[0] = &w; + b[1] = &x2; + devfun(b); +} + +int* getDeviceSymbol(int *x); + +void foo() { + getDeviceSymbol(&x); + getDeviceSymbol(&y); + z = 123; +} + +// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] +// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] +// HOST-NOT: __hipRegisterVar({{.*}}@_ZL2x2 +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p diff --git a/clang/test/SemaCUDA/static-device-var.cu b/clang/test/SemaCUDA/static-device-var.cu new file mode 100644 --- /dev/null +++ b/clang/test/SemaCUDA/static-device-var.cu @@ -0,0 +1,50 @@ +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ +// RUN: -emit-llvm -o - %s -fsyntax-only -verify=dev + +// RUN: %clang_cc1 -triple x86_64-gnu-linux \ +// RUN: -emit-llvm -o - %s -fsyntax-only -verify=host + +// Checks allowed usage of file-scope and function-scope static variables. + +// host-no-diagnostics + +#include "Inputs/cuda.h" + +// Checks static variables are allowed in device functions. + +__device__ void f1() { + const static int b = 123; + static int a; +} + +// Checks static variables are allowd in global functions. + +__global__ void k1() { + const static int b = 123; + static int a; +} + +// Checks static device and constant variables are allowed in device and +// host functions, and static host variables are not allowed in device +// functions. + +static __device__ int x; +static __constant__ int y; +static int z; + +__global__ void kernel(int *a) { + a[0] = x; + a[1] = y; + a[2] = z; + // dev-error@-1 {{reference to __host__ variable 'z' in __global__ function}} +} + +int* getDeviceSymbol(int *x); + +void foo() { + getDeviceSymbol(&x); + getDeviceSymbol(&y); +}