Index: lib/CodeGen/CGCUDANV.cpp =================================================================== --- lib/CodeGen/CGCUDANV.cpp +++ lib/CodeGen/CGCUDANV.cpp @@ -218,6 +218,7 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() || + getDeviceSideName(CGF.CurFuncDecl) + ".stub" == CGF.CurFn->getName() || CGF.CGM.getContext().getTargetInfo().getCXXABI() != CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI()); Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -1048,8 +1048,17 @@ // Keep the first result in the case of a mangling collision. const auto *ND = cast(GD.getDecl()); - auto Result = - Manglings.insert(std::make_pair(getMangledNameImpl(*this, GD, ND), GD)); + std::string MangledName = getMangledNameImpl(*this, GD, ND); + + // Postfix kernel stub names with .stub to differentiate them from kernel + // names in device binaries. This is to facilitate the debugger to find + // the correct symbols for kernels in the device binary. + if (auto *FD = dyn_cast(GD.getDecl())) + if (getLangOpts().HIP && !getLangOpts().CUDAIsDevice && + FD->hasAttr()) + MangledName = MangledName + ".stub"; + + auto Result = Manglings.insert(std::make_pair(MangledName, GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); } Index: test/CodeGenCUDA/kernel-stub-name.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/kernel-stub-name.cu @@ -0,0 +1,20 @@ +// RUN: echo "GPU binary would be here" > %t + +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ +// RUN: -fcuda-include-gpubinary %t -o - -x hip\ +// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=CHECK + +#include "Inputs/cuda.h" + +template +__global__ void kernelfunc() {} + +// CHECK-LABEL: define{{.*}}@_Z8hostfuncv() +// CHECK: call void @[[STUB:_Z10kernelfuncIiEvv.stub]]() +void hostfunc(void) { kernelfunc<<<1, 1>>>(); } + +// CHECK: define{{.*}}@[[STUB]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[STUB]] + +// CHECK-LABEL: define{{.*}}@__hip_register_globals +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[STUB]]