Index: clang/include/clang/AST/ASTContext.h =================================================================== --- clang/include/clang/AST/ASTContext.h +++ clang/include/clang/AST/ASTContext.h @@ -3005,7 +3005,8 @@ } } - GVALinkage GetGVALinkageForFunction(const FunctionDecl *FD) const; + GVALinkage GetGVALinkageForFunction(const FunctionDecl *FD, + bool IgnoreCUDAGlobalAttr = false) const; GVALinkage GetGVALinkageForVariable(const VarDecl *VD); /// Determines if the decl can be CodeGen'ed or deserialized from PCH Index: clang/lib/AST/ASTContext.cpp =================================================================== --- clang/lib/AST/ASTContext.cpp +++ clang/lib/AST/ASTContext.cpp @@ -11307,8 +11307,9 @@ return GVA_DiscardableODR; } -static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context, - const Decl *D, GVALinkage L) { +static GVALinkage +adjustGVALinkageForAttributes(const ASTContext &Context, const Decl *D, + GVALinkage L, bool IgnoreCUDAGlobalAttr = false) { // See http://msdn.microsoft.com/en-us/library/xa0d9ste.aspx // dllexport/dllimport on inline functions. if (D->hasAttr()) { @@ -11317,7 +11318,8 @@ } else if (D->hasAttr()) { if (L == GVA_DiscardableODR) return GVA_StrongODR; - } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) { + } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice && + !IgnoreCUDAGlobalAttr) { // Device-side functions with __global__ attribute must always be // visible externally so they can be launched from host. if (D->hasAttr() && @@ -11360,10 +11362,14 @@ return L; } -GVALinkage ASTContext::GetGVALinkageForFunction(const FunctionDecl *FD) const { - return adjustGVALinkageForExternalDefinitionKind(*this, FD, - adjustGVALinkageForAttributes(*this, FD, - basicGVALinkageForFunction(*this, FD))); +GVALinkage +ASTContext::GetGVALinkageForFunction(const FunctionDecl *FD, + bool IgnoreCUDAGlobalAttr) const { + return adjustGVALinkageForExternalDefinitionKind( + *this, FD, + adjustGVALinkageForAttributes(*this, FD, + basicGVALinkageForFunction(*this, FD), + IgnoreCUDAGlobalAttr)); } static GVALinkage basicGVALinkageForVariable(const ASTContext &Context, @@ -12290,7 +12296,10 @@ // anonymous name space needs to be externalized to avoid duplicate symbols. return (IsStaticVar && (D->hasAttr() || IsExplicitDeviceVar)) || - (D->hasAttr() && D->isInAnonymousNamespace()); + (D->hasAttr() && + GetGVALinkageForFunction(cast(D), + /*IgnoreCUDAGlobalAttr=*/true) == + GVA_Internal); } bool ASTContext::shouldExternalize(const Decl *D) const { Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -6779,8 +6779,14 @@ void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS, const Decl *D) const { - OS << (isa(D) ? "__static__" : ".anon.") - << getContext().getCUIDHash(); + StringRef Tag; + // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers + // postfix beginning with '.' since the symbol name can be demangled. + if (LangOpts.HIP) + Tag = (isa(D) ? ".static." : ".intern."); + else + Tag = (isa(D) ? "__static__" : "__intern__"); + OS << Tag << getContext().getCUIDHash(); } namespace { Index: clang/test/CodeGenCUDA/device-var-linkage.cu =================================================================== --- clang/test/CodeGenCUDA/device-var-linkage.cu +++ clang/test/CodeGenCUDA/device-var-linkage.cu @@ -37,15 +37,15 @@ extern __managed__ int ev3; // NORDC-DAG: @_ZL3sv1 = addrspace(1) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 +// RDC-DAG: @_ZL3sv1.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 // HOST-DAG: @_ZL3sv1 = internal global i32 undef static __device__ int sv1; // NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 +// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized global i32 0 // HOST-DAG: @_ZL3sv2 = internal global i32 undef static __constant__ int sv2; // NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global i32 addrspace(1)* null -// RDC-DAG: @_ZL3sv3__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null +// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null // HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null static __managed__ int sv3; Index: clang/test/CodeGenCUDA/kernel-in-anon-ns.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-in-anon-ns.cu +++ clang/test/CodeGenCUDA/kernel-in-anon-ns.cu @@ -8,17 +8,38 @@ // RUN: cat %t.dev %t.host | FileCheck %s +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \ +// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=CUDA %s + #include "Inputs/cuda.h" -// CHECK: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.anon\.b04fd23c98500190]]( -// CHECK: @[[STR:.*]] = {{.*}} c"[[KERN]]\00" -// CHECK: call i32 @__hipRegisterFunction({{.*}}@[[STR]] +// CHECK-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]]( +// CHECK-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]]( +// CHECK-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]]( +// CHECK-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00" +// CHECK-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00" +// CHECK-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00" +// CHECK-DAG: call i32 @__hipRegisterFunction({{.*}}@[[STR1]] +// CHECK-DAG: call i32 @__hipRegisterFunction({{.*}}@[[STR2]] +// CHECK-DAG: call i32 @__hipRegisterFunction({{.*}}@[[STR3]] + +// CUDA: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]]( + +template +__global__ void tempKern(T x) {} namespace { -__global__ void kernel() { -} + __global__ void kernel() {} + struct X {}; + X x; + auto lambda = [](){}; } void test() { kernel<<<1, 1>>>(); + + tempKern<<<1, 1>>>(x); + + tempKern<<<1, 1>>>(lambda); } Index: clang/test/CodeGenCUDA/managed-var.cu =================================================================== --- clang/test/CodeGenCUDA/managed-var.cu +++ clang/test/CodeGenCUDA/managed-var.cu @@ -52,15 +52,15 @@ // NORDC-D-DAG: @_ZL2sx.managed = addrspace(1) externally_initialized global i32 1, align 4 // NORDC-D-DAG: @_ZL2sx = addrspace(1) externally_initialized global i32 addrspace(1)* null -// RDC-D-DAG: @_ZL2sx__static__[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4 -// RDC-D-DAG: @_ZL2sx__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null +// RDC-D-DAG: @_ZL2sx.static.[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4 +// RDC-D-DAG: @_ZL2sx.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null // HOST-DAG: @_ZL2sx.managed = internal global i32 1 // HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null // NORDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx\00" -// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH:.*]]\00" +// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH:.*]]\00" -// POSTFIX: @_ZL2sx__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null -// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH]]\00" +// POSTFIX: @_ZL2sx.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null +// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH]]\00" static __managed__ int sx = 1; // DEV-DAG: @llvm.compiler.used Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu =================================================================== --- clang/test/CodeGenCUDA/static-device-var-rdc.cu +++ clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -40,6 +40,11 @@ // RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host // RUN: cat %t.host | FileCheck -check-prefix=HOST-NEG %s +// Check postfix for CUDA. + +// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device -cuid=abc \ +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - %s | FileCheck \ +// RUN: -check-prefixes=CUDA %s #include "Inputs/cuda.h" @@ -55,11 +60,12 @@ // INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00" // Test externalized static device variables -// EXT-DEV-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 -// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH:.*]]\00" +// EXT-DEV-DAG: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH:.*]]\00" +// CUDA-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 -// POSTFIX: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 -// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH]]\00" +// POSTFIX: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 +// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH]]\00" static __device__ int x; @@ -73,8 +79,8 @@ // INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" // Test externalized static device variables -// EXT-DEV-DAG: @_ZL1y__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 -// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y__static__[[HASH]]\00" +// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00" static __constant__ int y;