Index: clang/lib/AST/ASTContext.cpp =================================================================== --- clang/lib/AST/ASTContext.cpp +++ clang/lib/AST/ASTContext.cpp @@ -10560,7 +10560,10 @@ return GVA_StrongODR; // Single source offloading languages like CUDA/HIP need to be able to // access static device variables from host code of the same compilation - // unit. This is done by externalizing the static variable. + // unit. This is done by externalizing the static variable with a shared + // name between the host and device compilation which is the same for the + // same compilation unit whereas different among different compilation + // units. if (Context.shouldExternalizeStaticVar(D)) return GVA_StrongExternal; } @@ -11440,7 +11443,8 @@ } bool ASTContext::mayExternalizeStaticVar(const Decl *D) const { - return !getLangOpts().GPURelocatableDeviceCode && + return (!getLangOpts().CUID.empty() || + !getLangOpts().GPURelocatableDeviceCode) && ((D->hasAttr() && !D->getAttr()->isImplicit()) || (D->hasAttr() && Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -234,6 +234,16 @@ DeviceSideName = std::string(Out.str()); } else DeviceSideName = std::string(ND->getIdentifier()->getName()); + + // Make unique name for device side static file-scope variable for HIP. + if (CGM.getContext().shouldExternalizeStaticVar(ND) && + CGM.getLangOpts().GPURelocatableDeviceCode) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << DeviceSideName; + CGM.printPostfixForExternalizedStaticVar(Out); + DeviceSideName = std::string(Out.str()); + } return DeviceSideName; } Index: clang/lib/CodeGen/CodeGenModule.h =================================================================== --- clang/lib/CodeGen/CodeGenModule.h +++ clang/lib/CodeGen/CodeGenModule.h @@ -1416,6 +1416,10 @@ TBAAAccessInfo *TBAAInfo = nullptr); bool stopAutoInit(); + /// Print the postfix for externalized static variable for single source + /// offloading languages CUDA and HIP. + void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const; + private: llvm::Constant *GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -1181,6 +1181,11 @@ } } + // Make unique name for device side static file-scope variable for HIP. + if (CGM.getContext().shouldExternalizeStaticVar(ND) && + CGM.getLangOpts().GPURelocatableDeviceCode && + CGM.getLangOpts().CUDAIsDevice) + CGM.printPostfixForExternalizedStaticVar(Out); return std::string(Out.str()); } @@ -1238,9 +1243,16 @@ } } - auto FoundName = MangledDeclNames.find(CanonicalGD); - if (FoundName != MangledDeclNames.end()) - return FoundName->second; + // In CUDA/HIP device compilation with -fgpu-rdc, the mangled name of a + // static device variable depends on whether the variable is referenced by + // a host or device host function. Therefore the mangled name cannot be + // cached. + if (!LangOpts.CUDAIsDevice || + !getContext().mayExternalizeStaticVar(GD.getDecl())) { + auto FoundName = MangledDeclNames.find(CanonicalGD); + if (FoundName != MangledDeclNames.end()) + return FoundName->second; + } // Keep the first result in the case of a mangling collision. const auto *ND = cast(GD.getDecl()); @@ -2848,7 +2860,15 @@ DelayedCXXInitPosition[Global] = CXXGlobalInits.size(); CXXGlobalInits.push_back(nullptr); } - +#if 0 + // We need to decide whether to externalize a static variable after checking + // whether it is referenced in host code. + if (isa(Global) && getContext().mayExternalizeStaticVar( + cast(Global))) { + addDeferredDeclToEmit(GD); + return; + } +#endif StringRef MangledName = getMangledName(GD); if (GetGlobalValue(MangledName) != nullptr) { // The value has already been used and should therefore be emitted. @@ -6291,3 +6311,8 @@ } return false; } + +void CodeGenModule::printPostfixForExternalizedStaticVar( + llvm::raw_ostream &OS) const { + OS << ".static." << getLangOpts().CUID; +} Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -0,0 +1,89 @@ +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=DEV,INT-DEV %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux \ +// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=HOST,INT-HOST %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=123abc \ +// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=DEV,EXT-DEV %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=123abc \ +// RUN: -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=HOST,EXT-HOST %s + +#include "Inputs/cuda.h" + +// Test function scope static device variable, which should not be externalized. +// DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1 + + +// HOST-DAG: @_ZL1x = internal global i32 undef +// HOST-DAG: @_ZL1y = internal global i32 undef + +// Test normal static device variables +// INT-DEV-DAG: @_ZL1x = internal addrspace(1) global i32 0 +// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00" + +// Test externalized static device variables +// EXT-DEV-DAG: @_ZL1x.static.123abc = {{.*}}addrspace(1) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.123abc\00" + +static __device__ int x; + +// Test static device variables not used by host code should not be externalized +// DEV-DAG: @_ZL2x2 = internal addrspace(1) global i32 0 + +static __device__ int x2; + +// Test normal static device variables +// INT-DEV-DAG: @_ZL1y = internal addrspace(4) global i32 0 +// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" + +// Test externalized static device variables +// EXT-DEV-DAG: @_ZL1y.static.123abc = {{.*}}addrspace(4) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.123abc\00" + +static __constant__ int y; + +// Test static host variable, which should not be externalized nor registered. +// HOST-DAG: @_ZL1z = internal global i32 0 +// DEV-NOT: @_ZL1z +static int z; + +// Test static device variable in inline function, which should not be +// externalized nor registered. +// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat + +inline __device__ void devfun(const int ** b) { + const static int p = 2; + b[0] = &p; +} + +__global__ void kernel(int *a, const int **b) { + const static int w = 1; + a[0] = x; + a[1] = y; + b[0] = &w; + b[1] = &x2; + devfun(b); +} + +int* getDeviceSymbol(int *x); + +void foo() { + getDeviceSymbol(&x); + getDeviceSymbol(&y); + z = 123; +} + +// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] +// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] +// HOST-NOT: __hipRegisterVar({{.*}}@_ZL2x2 +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p Index: clang/test/SemaCUDA/static-device-var.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/static-device-var.cu @@ -0,0 +1,37 @@ +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ +// RUN: -emit-llvm -o - %s -fsyntax-only -verify + +// RUN: %clang_cc1 -triple x86_64-gnu-linux \ +// RUN: -emit-llvm -o - %s -fsyntax-only -verify + +// expected-no-diagnostics + +#include "Inputs/cuda.h" + +__device__ void f1() { + const static int b = 123; + static int a; +} + +__global__ void k1() { + const static int b = 123; + static int a; +} + +static __device__ int x; +static __constant__ int y; + +__global__ void kernel(int *a) { + a[0] = x; + a[1] = y; +} + +int* getDeviceSymbol(int *x); + +void foo() { + getDeviceSymbol(&x); + getDeviceSymbol(&y); +}