Index: include/clang/AST/Mangle.h =================================================================== --- include/clang/AST/Mangle.h +++ include/clang/AST/Mangle.h @@ -147,6 +147,12 @@ /// across translation units so it can be used with LTO. virtual void mangleTypeName(QualType T, raw_ostream &) = 0; + /// Whether should add prefix to HIP device stub function. + virtual bool shouldPrefixDeviceStub() const { return false; } + + /// Set whether should add prefix to HIP device stub function. + virtual void setPrefixDeviceStub(bool Prefix) {} + /// @} }; Index: lib/AST/ItaniumMangle.cpp =================================================================== --- lib/AST/ItaniumMangle.cpp +++ lib/AST/ItaniumMangle.cpp @@ -122,6 +122,9 @@ llvm::DenseMap Discriminator; llvm::DenseMap Uniquifier; + // Add prefix to HIP device stub function. + bool PrefixDevStub = true; + public: explicit ItaniumMangleContextImpl(ASTContext &Context, DiagnosticsEngine &Diags) @@ -203,6 +206,11 @@ disc = discriminator-2; return true; } + virtual bool shouldPrefixDeviceStub() const override { return PrefixDevStub; } + virtual void setPrefixDeviceStub(bool Prefix) override { + PrefixDevStub = Prefix; + } + /// @} }; @@ -483,6 +491,7 @@ const AbiTagList *AdditionalAbiTags); void mangleSourceName(const IdentifierInfo *II); void mangleRegCallName(const IdentifierInfo *II); + void mangleDeviceStubName(const IdentifierInfo *II); void mangleSourceNameWithAbiTags( const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr); void mangleLocalName(const Decl *D, @@ -1302,7 +1311,12 @@ bool IsRegCall = FD && FD->getType()->castAs()->getCallConv() == clang::CC_X86RegCall; - if (IsRegCall) + bool IsDeviceStub = FD && getASTContext().getLangOpts().HIP && + !getASTContext().getLangOpts().CUDAIsDevice && + FD->hasAttr(); + if (IsDeviceStub && Context.shouldPrefixDeviceStub()) + mangleDeviceStubName(II); + else if (IsRegCall) mangleRegCallName(II); else mangleSourceName(II); @@ -1491,6 +1505,14 @@ << II->getName(); } +void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) { + // ::= __device_stub__ + // ::= [n] + // ::= + Out << II->getLength() + sizeof("__device_stub__") - 1 << "__device_stub__" + << II->getName(); +} + void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) { // ::= // ::= [n] Index: lib/CodeGen/CGCUDANV.cpp =================================================================== --- lib/CodeGen/CGCUDANV.cpp +++ lib/CodeGen/CGCUDANV.cpp @@ -166,6 +166,7 @@ CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy)); VoidPtrTy = cast(Types.ConvertType(Ctx.VoidPtrTy)); VoidPtrPtrTy = VoidPtrTy->getPointerTo(); + DeviceMC->setPrefixDeviceStub(false); } llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const { @@ -231,6 +232,7 @@ assert((CGF.CGM.getContext().getAuxTargetInfo() && (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() != CGF.CGM.getContext().getTargetInfo().getCXXABI())) || + CGF.getLangOpts().HIP || getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) == CGF.CurFn->getName()); @@ -798,9 +800,9 @@ } std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const { - if (!CGM.getLangOpts().HIP) + if (!CGM.getLangOpts().HIP || Name.startswith("_Z")) return Name; - return (Name + ".stub").str(); + return ("__device_stub__" + Name).str(); } CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { Index: test/CodeGenCUDA/kernel-stub-name.cu =================================================================== --- test/CodeGenCUDA/kernel-stub-name.cu +++ test/CodeGenCUDA/kernel-stub-name.cu @@ -6,15 +6,44 @@ #include "Inputs/cuda.h" +extern "C" __global__ void ckernel() {} + +namespace ns { +__global__ void nskernel() {} +} // namespace ns + template __global__ void kernelfunc() {} +// Device side kernel names + +// CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00" +// CHECK: @[[NSKERN:[0-9]*]] = {{.*}} c"_ZN2ns8nskernelEv\00" +// CHECK: @[[TKERN:[0-9]*]] = {{.*}} c"_Z10kernelfuncIiEvv\00" + +// Non-template kernel stub functions + +// CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]] +// CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]] + // CHECK-LABEL: define{{.*}}@_Z8hostfuncv() -// CHECK: call void @[[STUB:_Z10kernelfuncIiEvv.stub]]() -void hostfunc(void) { kernelfunc<<<1, 1>>>(); } +// CHECK: call void @[[CSTUB]]() +// CHECK: call void @[[NSSTUB]]() +// CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]() +void hostfunc(void) { + ckernel<<<1, 1>>>(); + ns::nskernel<<<1, 1>>>(); + kernelfunc<<<1, 1>>>(); +} + +// Template kernel stub functions -// CHECK: define{{.*}}@[[STUB]] -// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[STUB]] +// CHECK: define{{.*}}@[[TSTUB]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]] // CHECK-LABEL: define{{.*}}@__hip_register_globals -// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[STUB]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]]