Index: clang/lib/AST/ASTContext.cpp =================================================================== --- clang/lib/AST/ASTContext.cpp +++ clang/lib/AST/ASTContext.cpp @@ -13602,16 +13602,17 @@ } bool ASTContext::mayExternalize(const Decl *D) const { - bool IsStaticVar = - isa(D) && cast(D)->getStorageClass() == SC_Static; + bool IsInternalVar = + isa(D) && + basicGVALinkageForVariable(*this, cast(D)) == GVA_Internal; bool IsExplicitDeviceVar = (D->hasAttr() && !D->getAttr()->isImplicit()) || (D->hasAttr() && !D->getAttr()->isImplicit()); - // CUDA/HIP: static managed variables need to be externalized since it is + // CUDA/HIP: managed variables need to be externalized since it is // a declaration in IR, therefore cannot have internal linkage. Kernels in // anonymous name space needs to be externalized to avoid duplicate symbols. - return (IsStaticVar && + return (IsInternalVar && (D->hasAttr() || IsExplicitDeviceVar)) || (D->hasAttr() && basicGVALinkageForFunction(*this, cast(D)) == Index: clang/test/CodeGenCUDA/anon-ns.cu =================================================================== --- clang/test/CodeGenCUDA/anon-ns.cu +++ clang/test/CodeGenCUDA/anon-ns.cu @@ -1,9 +1,9 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ -// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \ +// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++17 -fgpu-rdc \ // RUN: -emit-llvm -o - -x hip %s > %t.dev // RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ -// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \ +// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++17 -fgpu-rdc \ // RUN: -emit-llvm -o - -x hip %s > %t.host // RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s @@ -11,11 +11,11 @@ // RUN: echo "GPU binary" > %t.fatbin // RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \ -// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \ +// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++17 -fgpu-rdc \ // RUN: -emit-llvm -o - %s > %t.dev // RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ -// RUN: -aux-triple nvptx -std=c++11 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \ +// RUN: -aux-triple nvptx -std=c++17 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \ // RUN: -emit-llvm -o - %s > %t.host // RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s @@ -25,34 +25,62 @@ // HIP-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]]( // HIP-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]]( // HIP-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]]( +// HIP-DAG: @[[VAR1:_ZN12_GLOBAL__N_11AE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global +// HIP-DAG: @[[VAR2:_ZN12_GLOBAL__N_11BE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized global +// HIP-DAG: @[[VAR3:_Z7tempVarIN12_GLOBAL__N_11XEE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global // CUDA-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]]( // CUDA-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]]( // CUDA-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]]( +// CUDA-DAG: @[[VAR2:_ZN12_GLOBAL__N_11BE__static__b04fd23c98500190]] = addrspace(4) externally_initialized global +// CUDA-DAG: @[[VAR3:_Z7tempVarIN12_GLOBAL__N_11XEE__static__b04fd23c98500190]] = addrspace(1) externally_initialized global + +// HIP-DAG: @llvm.compiler.used = {{.*}}@[[VAR1]]{{.*}}@[[VAR3]]{{.*}}@[[VAR2]] +// CUDA-DAG: @llvm.compiler.used = {{.*}}@[[VAR3]]{{.*}}@[[VAR2]] // COMMON-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00" // COMMON-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00" // COMMON-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00" +// HIP-DAG: @[[STR4:.*]] = {{.*}} c"[[VAR1]]\00" +// COMMON-DAG: @[[STR5:.*]] = {{.*}} c"[[VAR2]]\00" +// COMMON-DAG: @[[STR6:.*]] = {{.*}} c"[[VAR3]]\00" // COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR1]] // COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR2]] // COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR3]] - +// HIP-DAG: call void @__{{.*}}RegisterManagedVar({{.*}}@[[STR4]] +// COMMON-DAG: call void @__{{.*}}RegisterVar({{.*}}@[[STR5]] +// COMMON-DAG: call void @__{{.*}}RegisterVar({{.*}}@[[STR6]] template __global__ void tempKern(T x) {} +template +__device__ T tempVar; + namespace { __global__ void kernel() {} struct X {}; X x; auto lambda = [](){}; +#if __HIP__ + __managed__ int A = 1; +#endif + __constant__ int B = 2; } +template +void getSymbol(T *x) {} + void test() { kernel<<<1, 1>>>(); tempKern<<<1, 1>>>(x); tempKern<<<1, 1>>>(lambda); +#if __HIP__ + getSymbol(&A); +#endif + getSymbol(&B); + getSymbol(&tempVar); }