diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -1147,6 +1147,7 @@ Var->setAlignment(CGM.getPointerAlign().getAsAlign()); Var->setDSOLocal(F->isDSOLocal()); Var->setVisibility(F->getVisibility()); + CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var); KernelHandles[F] = Var; KernelStubs[Var] = F; return Var; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4308,11 +4308,6 @@ if (!CGM.supportsCOMDAT()) return false; - // Do not set COMDAT attribute for CUDA/HIP stub functions to prevent - // them being "merged" by the COMDAT Folding linker optimization. - if (D.hasAttr()) - return false; - if (D.hasAttr()) return true; diff --git a/clang/test/CodeGenCUDA/kernel-stub-name.cu b/clang/test/CodeGenCUDA/kernel-stub-name.cu --- a/clang/test/CodeGenCUDA/kernel-stub-name.cu +++ b/clang/test/CodeGenCUDA/kernel-stub-name.cu @@ -2,16 +2,35 @@ // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -fcuda-include-gpubinary %t -o - -x hip\ -// RUN: | FileCheck %s +// RUN: | FileCheck -check-prefixes=CHECK,GNU %s + +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ +// RUN: -fcuda-include-gpubinary %t -o - -x hip\ +// RUN: | FileCheck -check-prefix=NEG %s + +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \ +// RUN: -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \ +// RUN: %t -o - -x hip\ +// RUN: | FileCheck -check-prefixes=CHECK,MSVC %s + +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \ +// RUN: -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \ +// RUN: %t -o - -x hip\ +// RUN: | FileCheck -check-prefix=NEG %s #include "Inputs/cuda.h" -// Kernel handles +// Check kernel handles are emitted for non-MSVC target but not for MSVC target. -// CHECK: @[[HCKERN:ckernel]] = constant void ()* @__device_stub__ckernel, align 8 -// CHECK: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant void ()* @_ZN2ns23__device_stub__nskernelEv, align 8 -// CHECK: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant void ()* @_Z25__device_stub__kernelfuncIiEvv, align 8 -// CHECK: @[[HDKERN:_Z11kernel_declv]] = external constant void ()*, align 8 +// GNU: @[[HCKERN:ckernel]] = constant void ()* @[[CSTUB:__device_stub__ckernel]], align 8 +// GNU: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant void ()* @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8 +// GNU: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant void ()* @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8 +// GNU: @[[HDKERN:_Z11kernel_declv]] = external constant void ()*, align 8 + +// MSVC: @[[HCKERN:ckernel]] = dso_local constant void ()* @[[CSTUB:__device_stub__ckernel]], align 8 +// MSVC: @[[HNSKERN:"\?nskernel@ns@@YAXXZ.*"]] = dso_local constant void ()* @[[NSSTUB:"\?nskernel@ns@@YAXXZ"]], align 8 +// MSVC: @[[HTKERN:"\?\?\$kernelfunc@H@@YAXXZ.*"]] = linkonce_odr dso_local constant void ()* @[[TSTUB:"\?\?\$kernelfunc@H@@YAXXZ.*"]], comdat, align 8 +// MSVC: @[[HDKERN:"\?kernel_decl@@YAXXZ.*"]] = external dso_local constant void ()*, align 8 extern "C" __global__ void ckernel() {} @@ -24,10 +43,10 @@ __global__ void kernel_decl(); -void (*kernel_ptr)(); -void *void_ptr; +extern "C" void (*kernel_ptr)(); +extern "C" void *void_ptr; -void launch(void *kern); +extern "C" void launch(void *kern); // Device side kernel names @@ -37,21 +56,22 @@ // Non-template kernel stub functions -// CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]] +// CHECK: define{{.*}}@[[CSTUB]] // CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]] -// CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]] -// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]] +// CHECK: define{{.*}}@[[NSSTUB]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]] -// Check kernel stub is used for triple chevron +// Check kernel stub is called for triple chevron. -// CHECK-LABEL: define{{.*}}@_Z4fun1v() +// CHECK-LABEL: define{{.*}}@fun1() // CHECK: call void @[[CSTUB]]() // CHECK: call void @[[NSSTUB]]() -// CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]() -// CHECK: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]() +// CHECK: call void @[[TSTUB]]() +// GNU: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]() +// MSVC: call void @[[DSTUB:"\?kernel_decl@@YAXXZ"]]() -void fun1(void) { +extern "C" void fun1(void) { ckernel<<<1, 1>>>(); ns::nskernel<<<1, 1>>>(); kernelfunc<<<1, 1>>>(); @@ -67,28 +87,28 @@ // CHECK: declare{{.*}}@[[DSTUB]] -// Check kernel handle is used for passing the kernel as a function pointer +// Check kernel handle is used for passing the kernel as a function pointer. -// CHECK-LABEL: define{{.*}}@_Z4fun2v() -// CHECK: call void @_Z6launchPv({{.*}}[[HCKERN]] -// CHECK: call void @_Z6launchPv({{.*}}[[HNSKERN]] -// CHECK: call void @_Z6launchPv({{.*}}[[HTKERN]] -// CHECK: call void @_Z6launchPv({{.*}}[[HDKERN]] -void fun2() { +// CHECK-LABEL: define{{.*}}@fun2() +// CHECK: call void @launch({{.*}}[[HCKERN]] +// CHECK: call void @launch({{.*}}[[HNSKERN]] +// CHECK: call void @launch({{.*}}[[HTKERN]] +// CHECK: call void @launch({{.*}}[[HDKERN]] +extern "C" void fun2() { launch((void *)ckernel); launch((void *)ns::nskernel); launch((void *)kernelfunc); launch((void *)kernel_decl); } -// Check kernel handle is used for assigning a kernel to a function pointer +// Check kernel handle is used for assigning a kernel to a function pointer. -// CHECK-LABEL: define{{.*}}@_Z4fun3v() +// CHECK-LABEL: define{{.*}}@fun3() // CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8 // CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8 // CHECK: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8 // CHECK: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8 -void fun3() { +extern "C" void fun3() { kernel_ptr = ckernel; kernel_ptr = &ckernel; void_ptr = (void *)ckernel; @@ -96,34 +116,37 @@ } // Check kernel stub is loaded from kernel handle when function pointer is -// used with triple chevron +// used with triple chevron. -// CHECK-LABEL: define{{.*}}@_Z4fun4v() +// CHECK-LABEL: define{{.*}}@fun4() // CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr -// CHECK: call i32 @_Z16hipConfigureCall4dim3S_mP9hipStream +// CHECK: call i32 @{{.*hipConfigureCall}} // CHECK: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8 // CHECK: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to void ()** // CHECK: %[[STUB:.*]] = load void ()*, void ()** %[[CAST]], align 8 // CHECK: call void %[[STUB]]() -void fun4() { +extern "C" void fun4() { kernel_ptr = ckernel; kernel_ptr<<<1,1>>>(); } -// Check kernel handle is passed to a function +// Check kernel handle is passed to a function. -// CHECK-LABEL: define{{.*}}@_Z4fun5v() +// CHECK-LABEL: define{{.*}}@fun5() // CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr // CHECK: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8 // CHECK: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to i8* -// CHECK: call void @_Z6launchPv(i8* %[[CAST]]) -void fun5() { +// CHECK: call void @launch(i8* %[[CAST]]) +extern "C" void fun5() { kernel_ptr = ckernel; launch((void *)kernel_ptr); } +// Check kernel handle is registered. + // CHECK-LABEL: define{{.*}}@__hip_register_globals // CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]] // CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]] // CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]] -// CHECK-NOT: call{{.*}}@__hipRegisterFunction{{.*}}@[[HDKERN]]{{.*}}@{{[0-9]*}} +// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}__device_stub +// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}kernel_decl diff --git a/clang/test/CodeGenCUDA/usual-deallocators.cu b/clang/test/CodeGenCUDA/usual-deallocators.cu --- a/clang/test/CodeGenCUDA/usual-deallocators.cu +++ b/clang/test/CodeGenCUDA/usual-deallocators.cu @@ -109,7 +109,7 @@ } // Make sure that we've generated the kernel used by A::~A. -// DEVICE-LABEL: define dso_local void @_Z1fIiEvT_ +// DEVICE-LABEL: define void @_Z1fIiEvT_ // Make sure we've picked deallocator for the correct side of compilation.