Index: clang/include/clang/Basic/Specifiers.h =================================================================== --- clang/include/clang/Basic/Specifiers.h +++ clang/include/clang/Basic/Specifiers.h @@ -263,24 +263,25 @@ /// CallingConv - Specifies the calling convention that a function uses. enum CallingConv { - CC_C, // __attribute__((cdecl)) - CC_X86StdCall, // __attribute__((stdcall)) - CC_X86FastCall, // __attribute__((fastcall)) - CC_X86ThisCall, // __attribute__((thiscall)) - CC_X86VectorCall, // __attribute__((vectorcall)) - CC_X86Pascal, // __attribute__((pascal)) - CC_Win64, // __attribute__((ms_abi)) - CC_X86_64SysV, // __attribute__((sysv_abi)) - CC_X86RegCall, // __attribute__((regcall)) - CC_AAPCS, // __attribute__((pcs("aapcs"))) - CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) - CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) - CC_SpirFunction, // default for OpenCL functions on SPIR target - CC_OpenCLKernel, // inferred for OpenCL kernels - CC_Swift, // __attribute__((swiftcall)) - CC_PreserveMost, // __attribute__((preserve_most)) - CC_PreserveAll, // __attribute__((preserve_all)) + CC_C, // __attribute__((cdecl)) + CC_X86StdCall, // __attribute__((stdcall)) + CC_X86FastCall, // __attribute__((fastcall)) + CC_X86ThisCall, // __attribute__((thiscall)) + CC_X86VectorCall, // __attribute__((vectorcall)) + CC_X86Pascal, // __attribute__((pascal)) + CC_Win64, // __attribute__((ms_abi)) + CC_X86_64SysV, // __attribute__((sysv_abi)) + CC_X86RegCall, // __attribute__((regcall)) + CC_AAPCS, // __attribute__((pcs("aapcs"))) + CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) + CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) + CC_SpirFunction, // default for OpenCL functions on SPIR target + CC_OpenCLKernel, // inferred for OpenCL kernels + CC_Swift, // __attribute__((swiftcall)) + CC_PreserveMost, // __attribute__((preserve_most)) + CC_PreserveAll, // __attribute__((preserve_all)) CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs)) + CC_DeviceStub, // inferred for HIP device stub }; /// Checks whether the given calling convention supports variadic @@ -296,6 +297,7 @@ case CC_SpirFunction: case CC_OpenCLKernel: case CC_Swift: + case CC_DeviceStub: return false; default: return true; Index: clang/lib/AST/ItaniumMangle.cpp =================================================================== --- clang/lib/AST/ItaniumMangle.cpp +++ clang/lib/AST/ItaniumMangle.cpp @@ -483,6 +483,7 @@ const AbiTagList *AdditionalAbiTags); void mangleSourceName(const IdentifierInfo *II); void mangleRegCallName(const IdentifierInfo *II); + void mangleDeviceStubName(const IdentifierInfo *II); void mangleSourceNameWithAbiTags( const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr); void mangleLocalName(const Decl *D, @@ -1302,7 +1303,12 @@ bool IsRegCall = FD && FD->getType()->castAs()->getCallConv() == clang::CC_X86RegCall; - if (IsRegCall) + bool IsDeviceStub = + FD && FD->getType()->castAs()->getCallConv() == + clang::CC_DeviceStub; + if (IsDeviceStub) + mangleDeviceStubName(II); + else if (IsRegCall) mangleRegCallName(II); else mangleSourceName(II); @@ -1491,6 +1497,14 @@ << II->getName(); } +void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) { + // ::= __device_stub__ + // ::= [n] + // ::= + Out << II->getLength() + sizeof("__device_stub__") - 1 << "__device_stub__" + << II->getName(); +} + void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) { // ::= // ::= [n] @@ -2734,6 +2748,7 @@ case CC_OpenCLKernel: case CC_PreserveMost: case CC_PreserveAll: + case CC_DeviceStub: // FIXME: we should be mangling all of the above. return ""; Index: clang/lib/AST/Type.cpp =================================================================== --- clang/lib/AST/Type.cpp +++ clang/lib/AST/Type.cpp @@ -2947,6 +2947,8 @@ case CC_Swift: return "swiftcall"; case CC_PreserveMost: return "preserve_most"; case CC_PreserveAll: return "preserve_all"; + case CC_DeviceStub: + return "device_stub"; } llvm_unreachable("Invalid calling convention."); Index: clang/lib/AST/TypePrinter.cpp =================================================================== --- clang/lib/AST/TypePrinter.cpp +++ clang/lib/AST/TypePrinter.cpp @@ -893,6 +893,7 @@ break; case CC_SpirFunction: case CC_OpenCLKernel: + case CC_DeviceStub: // Do nothing. These CCs are not available as attributes. break; case CC_Swift: Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -313,6 +313,7 @@ case CC_X86Pascal: case CC_IntelOclBicc: case CC_OpenCLKernel: + case CC_DeviceStub: return CCCR_OK; default: return CCCR_Warning; @@ -659,6 +660,7 @@ case CC_PreserveAll: case CC_X86RegCall: case CC_OpenCLKernel: + case CC_DeviceStub: return CCCR_OK; default: return CCCR_Warning; @@ -733,6 +735,7 @@ case CC_Swift: case CC_X86RegCall: case CC_OpenCLKernel: + case CC_DeviceStub: return CCCR_OK; default: return CCCR_Warning; Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -231,6 +231,7 @@ assert((CGF.CGM.getContext().getAuxTargetInfo() && (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() != CGF.CGM.getContext().getTargetInfo().getCXXABI())) || + CGF.getLangOpts().HIP || getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) == CGF.CurFn->getName()); @@ -798,9 +799,9 @@ } std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const { - if (!CGM.getLangOpts().HIP) + if (!CGM.getLangOpts().HIP || Name.startswith("_Z")) return Name; - return (Name + ".stub").str(); + return ("__device_stub__" + Name).str(); } CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { Index: clang/lib/CodeGen/CGDebugInfo.cpp =================================================================== --- clang/lib/CodeGen/CGDebugInfo.cpp +++ clang/lib/CodeGen/CGDebugInfo.cpp @@ -1149,6 +1149,7 @@ static unsigned getDwarfCC(CallingConv CC) { switch (CC) { + case CC_DeviceStub: case CC_C: // Avoid emitting DW_AT_calling_convention if the C convention was used. return 0; Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -1094,8 +1094,18 @@ // Adjust kernel stub mangling as we may need to be able to differentiate // them from the kernel itself (e.g., for HIP). if (auto *FD = dyn_cast(GD.getDecl())) - if (!getLangOpts().CUDAIsDevice && FD->hasAttr()) + if (!getLangOpts().CUDAIsDevice && FD->hasAttr()) { + if (auto *TD = cast(FD)->getPrimaryTemplate()) + FD = TD->getTemplatedDecl(); + auto OldQT = FD->getType(); + auto *OldFT = OldQT->getAs(); + auto *NewFT = getContext().adjustFunctionType( + OldFT, OldFT->getExtInfo().withCallingConv(CC_DeviceStub)); + const_cast(FD)->setType(QualType(NewFT, 0)); + MangledName = getMangledNameImpl(*this, GD, ND); MangledName = getCUDARuntime().getDeviceStubName(MangledName); + const_cast(FD)->setType(OldQT); + } auto Result = Manglings.insert(std::make_pair(MangledName, GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); Index: clang/test/CodeGenCUDA/kernel-stub-name.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-stub-name.cu +++ clang/test/CodeGenCUDA/kernel-stub-name.cu @@ -6,15 +6,50 @@ #include "Inputs/cuda.h" +extern "C" __global__ void ckernel() {} + +namespace ns { +__global__ void nskernel() {} +} // namespace ns + template __global__ void kernelfunc() {} +__global__ void kernel_decl(); + +// Device side kernel names + +// CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00" +// CHECK: @[[NSKERN:[0-9]*]] = {{.*}} c"_ZN2ns8nskernelEv\00" +// CHECK: @[[TKERN:[0-9]*]] = {{.*}} c"_Z10kernelfuncIiEvv\00" + +// Non-template kernel stub functions + +// CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]] +// CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]] + // CHECK-LABEL: define{{.*}}@_Z8hostfuncv() -// CHECK: call void @[[STUB:_Z10kernelfuncIiEvv.stub]]() -void hostfunc(void) { kernelfunc<<<1, 1>>>(); } +// CHECK: call void @[[CSTUB]]() +// CHECK: call void @[[NSSTUB]]() +// CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]() +// CHECK: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]() +void hostfunc(void) { + ckernel<<<1, 1>>>(); + ns::nskernel<<<1, 1>>>(); + kernelfunc<<<1, 1>>>(); + kernel_decl<<<1, 1>>>(); +} + +// Template kernel stub functions + +// CHECK: define{{.*}}@[[TSTUB]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]] -// CHECK: define{{.*}}@[[STUB]] -// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[STUB]] +// CHECK: declare{{.*}}@[[DSTUB]] // CHECK-LABEL: define{{.*}}@__hip_register_globals -// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[STUB]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]]