diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2195,6 +2195,11 @@ assert(DeferredVTables.empty()); } + // Emit CUDA/HIP static device variables referenced by host code only. + if (getLangOpts().CUDA) + for (auto V : getContext().CUDAStaticDeviceVarReferencedByHost) + DeferredDeclsToEmit.push_back(V); + // Stop if we're out of both deferred vtables and deferred declarations. if (DeferredDeclsToEmit.empty()) return; diff --git a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu --- a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu +++ b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu @@ -63,6 +63,13 @@ // externalized nor registered. // DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat +// Check a static device variable referenced by host function only is externalized. +// DEV-DAG: @_ZL1w = addrspace(1) externally_initialized global i32 0 +// HOST-DAG: @_ZL1w = internal global i32 undef +// HOST-DAG: @[[DEVNAMEW:[0-9]+]] = {{.*}}c"_ZL1w\00" + +static __device__ int w; + inline __device__ void devfun(const int ** b) { const static int p = 2; b[0] = &p; @@ -92,11 +99,13 @@ getDeviceSymbol(&x); getDeviceSymbol(&x5); getDeviceSymbol(&y); + getDeviceSymbol(&w); z = 123; a[0] = &z2; } // HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] // HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] +// HOST: __hipRegisterVar({{.*}}@_ZL1w {{.*}}@[[DEVNAMEW]] // HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w // HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p