Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -132,8 +132,6 @@ llvm::Function *makeModuleCtorFunction() override; /// Creates module destructor function llvm::Function *makeModuleDtorFunction() override; - /// Construct and return the stub name of a kernel. - std::string getDeviceStubName(llvm::StringRef Name) const override; }; } @@ -219,21 +217,6 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { - // Ensure either we have different ABIs between host and device compilations, - // says host compilation following MSVC ABI but device compilation follows - // Itanium C++ ABI or, if they follow the same ABI, kernel names after - // mangling should be the same after name stubbing. The later checking is - // very important as the device kernel name being mangled in host-compilation - // is used to resolve the device binaries to be executed. Inconsistent naming - // result in undefined behavior. Even though we cannot check that naming - // directly between host- and device-compilations, the host- and - // device-mangling in host compilation could help catching certain ones. - assert((CGF.CGM.getContext().getAuxTargetInfo() && - (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() != - CGF.CGM.getContext().getTargetInfo().getCXXABI())) || - getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) == - CGF.CurFn->getName()); - EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), CudaFeature::CUDA_USES_NEW_LAUNCH) || @@ -797,12 +780,6 @@ return ModuleDtorFunc; } -std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const { - if (!CGM.getLangOpts().HIP) - return Name; - return (Name + ".stub").str(); -} - CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { return new CGNVCUDARuntime(CGM); } Index: clang/lib/CodeGen/CGCUDARuntime.h =================================================================== --- clang/lib/CodeGen/CGCUDARuntime.h +++ clang/lib/CodeGen/CGCUDARuntime.h @@ -25,6 +25,7 @@ namespace clang { class CUDAKernelCallExpr; +class FunctionDecl; class VarDecl; namespace CodeGen { @@ -65,9 +66,6 @@ /// Returns a module cleanup function or nullptr if it's not needed. /// Must be called after ModuleCtorFunction virtual llvm::Function *makeModuleDtorFunction() = 0; - - /// Construct and return the stub name of a kernel. - virtual std::string getDeviceStubName(llvm::StringRef Name) const = 0; }; /// Creates an instance of a CUDA runtime class. Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -1090,13 +1090,25 @@ // Keep the first result in the case of a mangling collision. const auto *ND = cast(GD.getDecl()); - std::string MangledName = getMangledNameImpl(*this, GD, ND); + std::string MangledName; // Adjust kernel stub mangling as we may need to be able to differentiate // them from the kernel itself (e.g., for HIP). - if (auto *FD = dyn_cast(GD.getDecl())) - if (!getLangOpts().CUDAIsDevice && FD->hasAttr()) - MangledName = getCUDARuntime().getDeviceStubName(MangledName); + if (ND && ND->hasAttr() && !getLangOpts().CUDAIsDevice && + getLangOpts().HIP) { + auto *FD = const_cast((ND)); + if (auto *TD = cast(FD)->getPrimaryTemplate()) + FD = TD->getTemplatedDecl(); + auto OldDeclName = FD->getDeclName(); + auto NewNameStr = std::string("__device_stub__") + OldDeclName.getAsString(); + auto *NewId = &Context.Idents.get(NewNameStr); + auto NewDeclName = DeclarationName(NewId); + FD->setDeclName(NewDeclName); + MangledName = getMangledNameImpl(*this, GD, ND); + FD->setDeclName(OldDeclName); + } else { + MangledName = getMangledNameImpl(*this, GD, ND); + } auto Result = Manglings.insert(std::make_pair(MangledName, GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); Index: clang/test/CodeGenCUDA/kernel-stub-name.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-stub-name.cu +++ clang/test/CodeGenCUDA/kernel-stub-name.cu @@ -6,15 +6,44 @@ #include "Inputs/cuda.h" +extern "C" __global__ void ckernel() {} + +namespace ns { +__global__ void nskernel() {} +} // namespace ns + template __global__ void kernelfunc() {} +// Device side kernel names + +// CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00" +// CHECK: @[[NSKERN:[0-9]*]] = {{.*}} c"_ZN2ns8nskernelEv\00" +// CHECK: @[[TKERN:[0-9]*]] = {{.*}} c"_Z10kernelfuncIiEvv\00" + +// Non-template kernel stub functions + +// CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]] +// CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]] + // CHECK-LABEL: define{{.*}}@_Z8hostfuncv() -// CHECK: call void @[[STUB:_Z10kernelfuncIiEvv.stub]]() -void hostfunc(void) { kernelfunc<<<1, 1>>>(); } +// CHECK: call void @[[CSTUB]]() +// CHECK: call void @[[NSSTUB]]() +// CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]() +void hostfunc(void) { + ckernel<<<1, 1>>>(); + ns::nskernel<<<1, 1>>>(); + kernelfunc<<<1, 1>>>(); +} + +// Template kernel stub functions -// CHECK: define{{.*}}@[[STUB]] -// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[STUB]] +// CHECK: define{{.*}}@[[TSTUB]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]] // CHECK-LABEL: define{{.*}}@__hip_register_globals -// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[STUB]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]] Index: clang/test/CodeGenCUDA/unnamed-types.cu =================================================================== --- clang/test/CodeGenCUDA/unnamed-types.cu +++ clang/test/CodeGenCUDA/unnamed-types.cu @@ -36,4 +36,4 @@ }(p); } // HOST: @__hip_register_globals -// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0 +// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0