Index: clang/include/clang/AST/ASTContext.h =================================================================== --- clang/include/clang/AST/ASTContext.h +++ clang/include/clang/AST/ASTContext.h @@ -3289,11 +3289,11 @@ /// Return a new OMPTraitInfo object owned by this context. OMPTraitInfo &getNewOMPTraitInfo(); - /// Whether a C++ static variable may be externalized. - bool mayExternalizeStaticVar(const Decl *D) const; + /// Whether a C++ static variable or CUDA/HIP kernel may be externalized. + bool mayExternalize(const Decl *D) const; - /// Whether a C++ static variable should be externalized. - bool shouldExternalizeStaticVar(const Decl *D) const; + /// Whether a C++ static variable or CUDA/HIP kernel should be externalized. + bool shouldExternalize(const Decl *D) const; StringRef getCUIDHash() const; Index: clang/lib/AST/ASTContext.cpp =================================================================== --- clang/lib/AST/ASTContext.cpp +++ clang/lib/AST/ASTContext.cpp @@ -11329,7 +11329,7 @@ // name between the host and device compilation which is the same for the // same compilation unit whereas different among different compilation // units. - if (Context.shouldExternalizeStaticVar(D)) + if (Context.shouldExternalize(D)) return GVA_StrongExternal; } return L; @@ -12278,24 +12278,24 @@ return DB << "a prior #pragma section"; } -bool ASTContext::mayExternalizeStaticVar(const Decl *D) const { +bool ASTContext::mayExternalize(const Decl *D) const { bool IsStaticVar = isa(D) && cast(D)->getStorageClass() == SC_Static; - bool InAnonNS = D->isInAnonymousNamespace(); bool IsExplicitDeviceVar = (D->hasAttr() && !D->getAttr()->isImplicit()) || (D->hasAttr() && !D->getAttr()->isImplicit()); - bool IsKernel = D->hasAttr(); // CUDA/HIP: static managed variables need to be externalized since it is - // a declaration in IR, therefore cannot have internal linkage. - return IsStaticVar && - (D->hasAttr() || IsExplicitDeviceVar); + // a declaration in IR, therefore cannot have internal linkage. Kernels in + // anonymous name space needs to be externalized to avoid duplicate symbols. + return (IsStaticVar && + (D->hasAttr() || IsExplicitDeviceVar)) || + (D->hasAttr() && D->isInAnonymousNamespace()); } -bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const { - return mayExternalizeStaticVar(D) && - (D->hasAttr() || +bool ASTContext::shouldExternalize(const Decl *D) const { + return mayExternalize(D) && + (D->hasAttr() || D->hasAttr() || CUDADeviceVarODRUsedByHost.count(cast(D))); } Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -281,13 +281,13 @@ DeviceSideName = std::string(ND->getIdentifier()->getName()); // Make unique name for device side static file-scope variable for HIP. - if (CGM.getContext().shouldExternalizeStaticVar(ND) && + if (CGM.getContext().shouldExternalize(ND) && CGM.getLangOpts().GPURelocatableDeviceCode && !CGM.getLangOpts().CUID.empty()) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << DeviceSideName; - CGM.printPostfixForExternalizedStaticVar(Out); + CGM.printPostfixForExternalizedDecl(Out, ND); DeviceSideName = std::string(Out.str()); } return DeviceSideName; Index: clang/lib/CodeGen/CodeGenModule.h =================================================================== --- clang/lib/CodeGen/CodeGenModule.h +++ clang/lib/CodeGen/CodeGenModule.h @@ -1457,9 +1457,10 @@ TBAAAccessInfo *TBAAInfo = nullptr); bool stopAutoInit(); - /// Print the postfix for externalized static variable for single source - /// offloading languages CUDA and HIP. - void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const; + /// Print the postfix for externalized static variable or kernels for single + /// source offloading languages CUDA and HIP. + void printPostfixForExternalizedDecl(llvm::raw_ostream &OS, + const Decl *D) const; /// Helper functions for generating a NoLoop kernel /// For a captured statement, get the single For statement, if it exists, Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -1382,10 +1382,10 @@ } // Make unique name for device side static file-scope variable for HIP. - if (CGM.getContext().shouldExternalizeStaticVar(ND) && + if (CGM.getContext().shouldExternalize(ND) && CGM.getLangOpts().GPURelocatableDeviceCode && CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty()) - CGM.printPostfixForExternalizedStaticVar(Out); + CGM.printPostfixForExternalizedDecl(Out, ND); return std::string(Out.str()); } @@ -1452,8 +1452,7 @@ // static device variable depends on whether the variable is referenced by // a host or device host function. Therefore the mangled name cannot be // cached. - if (!LangOpts.CUDAIsDevice || - !getContext().mayExternalizeStaticVar(GD.getDecl())) { + if (!LangOpts.CUDAIsDevice || !getContext().mayExternalize(GD.getDecl())) { auto FoundName = MangledDeclNames.find(CanonicalGD); if (FoundName != MangledDeclNames.end()) return FoundName->second; @@ -1473,7 +1472,7 @@ // directly between host- and device-compilations, the host- and // device-mangling in host compilation could help catching certain ones. assert(!isa(ND) || !ND->hasAttr() || - getLangOpts().CUDAIsDevice || + getContext().shouldExternalize(ND) || getLangOpts().CUDAIsDevice || (getContext().getAuxTargetInfo() && (getContext().getAuxTargetInfo()->getCXXABI() != getContext().getTargetInfo().getCXXABI())) || @@ -6798,9 +6797,10 @@ return false; } -void CodeGenModule::printPostfixForExternalizedStaticVar( - llvm::raw_ostream &OS) const { - OS << "__static__" << getContext().getCUIDHash(); +void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS, + const Decl *D) const { + OS << (isa(D) ? "__static__" : ".anon.") + << getContext().getCUIDHash(); } namespace { Index: clang/test/CodeGenCUDA/kernel-in-anon-ns.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/kernel-in-anon-ns.cu @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ +// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \ +// RUN: -emit-llvm -o - -x hip %s > %t.dev + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ +// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \ +// RUN: -emit-llvm -o - -x hip %s > %t.host + +// RUN: cat %t.dev %t.host | FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.anon\..*]]( +// CHECK: @[[STR:.*]] = {{.*}} c"[[KERN]]\00" +// CHECK: call i32 @__hipRegisterFunction({{.*}}@[[STR]] + +namespace { +__global__ void kernel() { +} +} + +void test() { + kernel<<<1, 1>>>(); +}