diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12298,7 +12298,9 @@ // anonymous name space needs to be externalized to avoid duplicate symbols. return (IsStaticVar && (D->hasAttr() || IsExplicitDeviceVar)) || - (D->hasAttr() && D->isInAnonymousNamespace()); + (D->hasAttr() && + basicGVALinkageForFunction(*this, cast(D)) == + GVA_Internal); } bool ASTContext::shouldExternalize(const Decl *D) const { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -6809,6 +6809,12 @@ void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS, const Decl *D) const { - OS << (isa(D) ? "__static__" : ".anon.") - << getContext().getCUIDHash(); + StringRef Tag; + // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers + // postfix beginning with '.' since the symbol name can be demangled. + if (LangOpts.HIP) + Tag = (isa(D) ? ".static." : ".intern."); + else + Tag = (isa(D) ? "__static__" : "__intern__"); + OS << Tag << getContext().getCUIDHash(); } diff --git a/clang/test/CodeGenCUDA/device-var-linkage.cu b/clang/test/CodeGenCUDA/device-var-linkage.cu --- a/clang/test/CodeGenCUDA/device-var-linkage.cu +++ b/clang/test/CodeGenCUDA/device-var-linkage.cu @@ -1,15 +1,18 @@ -// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device \ +// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn -fcuda-is-device \ // RUN: -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=DEV,NORDC %s -// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device \ +// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn -fcuda-is-device \ // RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=DEV,RDC %s -// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx \ +// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-gnu-linux \ // RUN: -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s -// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx \ +// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-gnu-linux \ // RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=HOST,RDC-H %s +// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device \ +// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CUDA %s #include "Inputs/cuda.h" @@ -24,7 +27,9 @@ // DEV-DAG: @v3 = addrspace(1) externally_initialized global i32 addrspace(1)* null // NORDC-H-DAG: @v3 = internal externally_initialized global i32* null // RDC-H-DAG: @v3 = externally_initialized global i32* null +#if __HIP__ __managed__ int v3; +#endif // DEV-DAG: @ev1 = external addrspace(1) global i32 // HOST-DAG: @ev1 = external global i32 @@ -34,25 +39,35 @@ extern __constant__ int ev2; // DEV-DAG: @ev3 = external addrspace(1) externally_initialized global i32 addrspace(1)* // HOST-DAG: @ev3 = external externally_initialized global i32* +#if __HIP__ extern __managed__ int ev3; +#endif // NORDC-DAG: @_ZL3sv1 = addrspace(1) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 +// RDC-DAG: @_ZL3sv1.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 // HOST-DAG: @_ZL3sv1 = internal global i32 undef +// CUDA-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 static __device__ int sv1; // NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 +// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized global i32 0 // HOST-DAG: @_ZL3sv2 = internal global i32 undef +// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 static __constant__ int sv2; // NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global i32 addrspace(1)* null -// RDC-DAG: @_ZL3sv3__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null +// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null // HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null +#if __HIP__ static __managed__ int sv3; +#endif __device__ __host__ int work(int *x); __device__ __host__ int fun1() { - return work(&ev1) + work(&ev2) + work(&ev3) + work(&sv1) + work(&sv2) + work(&sv3); + return work(&ev1) + work(&ev2) + work(&sv1) + work(&sv2) +#if __HIP__ + + work(&ev3) + work(&sv3) +#endif + ; } // HOST: hipRegisterVar({{.*}}@v1 diff --git a/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu b/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu --- a/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu +++ b/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu @@ -6,19 +6,53 @@ // RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \ // RUN: -emit-llvm -o - -x hip %s > %t.host -// RUN: cat %t.dev %t.host | FileCheck %s +// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s + +// RUN: echo "GPU binary" > %t.fatbin + +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \ +// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \ +// RUN: -emit-llvm -o - %s > %t.dev + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ +// RUN: -aux-triple nvptx -std=c++11 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \ +// RUN: -emit-llvm -o - %s > %t.host + +// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s #include "Inputs/cuda.h" -// CHECK: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.anon\.b04fd23c98500190]]( -// CHECK: @[[STR:.*]] = {{.*}} c"[[KERN]]\00" -// CHECK: call i32 @__hipRegisterFunction({{.*}}@[[STR]] +// HIP-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]]( +// HIP-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]]( +// HIP-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]]( + +// CUDA-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]]( +// CUDA-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]]( +// CUDA-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]]( + +// COMMON-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00" +// COMMON-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00" +// COMMON-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00" + +// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR1]] +// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR2]] +// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR3]] + + +template +__global__ void tempKern(T x) {} namespace { -__global__ void kernel() { -} + __global__ void kernel() {} + struct X {}; + X x; + auto lambda = [](){}; } void test() { kernel<<<1, 1>>>(); + + tempKern<<<1, 1>>>(x); + + tempKern<<<1, 1>>>(lambda); } diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu --- a/clang/test/CodeGenCUDA/managed-var.cu +++ b/clang/test/CodeGenCUDA/managed-var.cu @@ -1,5 +1,3 @@ -// REQUIRES: x86-registered-target, amdgpu-registered-target - // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ // RUN: -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=COMMON,DEV,NORDC-D %s @@ -52,15 +50,15 @@ // NORDC-D-DAG: @_ZL2sx.managed = addrspace(1) externally_initialized global i32 1, align 4 // NORDC-D-DAG: @_ZL2sx = addrspace(1) externally_initialized global i32 addrspace(1)* null -// RDC-D-DAG: @_ZL2sx__static__[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4 -// RDC-D-DAG: @_ZL2sx__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null +// RDC-D-DAG: @_ZL2sx.static.[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4 +// RDC-D-DAG: @_ZL2sx.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null // HOST-DAG: @_ZL2sx.managed = internal global i32 1 // HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null // NORDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx\00" -// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH:.*]]\00" +// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH:.*]]\00" -// POSTFIX: @_ZL2sx__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null -// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH]]\00" +// POSTFIX: @_ZL2sx.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null +// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH]]\00" static __managed__ int sx = 1; // DEV-DAG: @llvm.compiler.used diff --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu --- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu +++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -40,6 +40,11 @@ // RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host // RUN: cat %t.host | FileCheck -check-prefix=HOST-NEG %s +// Check postfix for CUDA. + +// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device -cuid=abc \ +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - %s | FileCheck \ +// RUN: -check-prefixes=CUDA %s #include "Inputs/cuda.h" @@ -55,11 +60,12 @@ // INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00" // Test externalized static device variables -// EXT-DEV-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 -// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH:.*]]\00" +// EXT-DEV-DAG: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH:.*]]\00" +// CUDA-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 -// POSTFIX: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 -// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH]]\00" +// POSTFIX: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 +// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH]]\00" static __device__ int x; @@ -73,8 +79,8 @@ // INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" // Test externalized static device variables -// EXT-DEV-DAG: @_ZL1y__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 -// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y__static__[[HASH]]\00" +// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00" static __constant__ int y;