Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -41,12 +41,16 @@ llvm::LLVMContext &Context; /// Convenience reference to the current module llvm::Module &TheModule; - /// Keeps track of kernel launch stubs emitted in this module + /// Keeps track of kernel launch stubs and handles emitted in this module struct KernelInfo { - llvm::Function *Kernel; + llvm::Function *Kernel; // stub function to help launch kernel const Decl *D; }; llvm::SmallVector EmittedKernels; + // Map a device stub function to a symbol for identifying kernel in host code. + // For CUDA, the symbol for identifying the kernel is the same as the device + // stub function. For HIP, they are different. + llvm::DenseMap KernelHandles; struct VarInfo { llvm::GlobalVariable *Var; const VarDecl *D; @@ -240,6 +244,18 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); + llvm::GlobalValue *KernelHandle = CGF.CurFn; + if (CGF.getLangOpts().HIP) { + auto Linkage = CGF.CurFn->getLinkage(); + auto *Var = new llvm::GlobalVariable( + TheModule, VoidPtrTy, /*isConstant=*/true, Linkage, + /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrTy), + CGM.getMangledName(GlobalDecl(cast(CGF.CurFuncDecl), + KernelReferenceKind::Kernel))); + Var->setAlignment(CGM.getPointerAlign().getAsAlign()); + KernelHandle = Var; + } + KernelHandles[CGF.CurFn] = KernelHandle; if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), CudaFeature::CUDA_USES_NEW_LAUNCH) || CGF.getLangOpts().HIPUseNewLaunchAPI) @@ -318,7 +334,8 @@ ShmemSize.getPointer(), Stream.getPointer()}); // Emit the call to cudaLaunch - llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy); + llvm::Value *Kernel = + CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy); CallArgList LaunchKernelArgs; LaunchKernelArgs.add(RValue::get(Kernel), cudaLaunchKernelFD->getParamDecl(0)->getType()); @@ -375,7 +392,8 @@ // Emit the call to cudaLaunch llvm::FunctionCallee cudaLaunchFn = getLaunchFn(); - llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy); + llvm::Value *Arg = + CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy); CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg); CGF.EmitBranch(EndBlock); @@ -428,7 +446,7 @@ llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); llvm::Value *Args[] = { &GpuBinaryHandlePtr, - Builder.CreateBitCast(I.Kernel, VoidPtrTy), + Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy), KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), Index: clang/test/CodeGenCUDA/Inputs/cuda.h =================================================================== --- clang/test/CodeGenCUDA/Inputs/cuda.h +++ clang/test/CodeGenCUDA/Inputs/cuda.h @@ -2,19 +2,28 @@ #include +#if __HIP__ || __CUDA__ #define __constant__ __attribute__((constant)) #define __device__ __attribute__((device)) #define __global__ __attribute__((global)) #define __host__ __attribute__((host)) #define __shared__ __attribute__((shared)) #define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__))) +#else +#define __constant__ +#define __device__ +#define __global__ +#define __host__ +#define __shared__ +#define __launch_bounds__(...) +#endif struct dim3 { unsigned x, y, z; __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {} }; -#ifdef __HIP__ +#if __HIP__ || HIP_PLATFORM typedef struct hipStream *hipStream_t; typedef enum hipError {} hipError_t; int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0, Index: clang/test/CodeGenCUDA/cxx-call-kernel.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/cxx-call-kernel.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -x hip -emit-llvm-bc %s -o %t.hip.bc +// RUN: %clang_cc1 -mlink-builtin-bitcode %t.hip.bc -DHIP_PLATFORM -emit-llvm \ +// RUN: %s -o - | FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK: @_Z2g1i = internal constant i8* null +#if __HIP__ +__global__ void g1(int x) {} +#else +extern void g1(int x); + +// CHECK: call i32 @hipLaunchKernel{{.*}}@_Z2g1i +void test() { + hipLaunchKernel((void*)g1, 1, 1, nullptr, 0, 0); +} + +// CHECK: __hipRegisterFunction{{.*}}@_Z2g1i +#endif Index: clang/test/CodeGenCUDA/kernel-stub-name.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-stub-name.cu +++ clang/test/CodeGenCUDA/kernel-stub-name.cu @@ -6,6 +6,12 @@ #include "Inputs/cuda.h" +// Kernel handles + +// CHECK: @[[HCKERN:ckernel]] = constant i8* null +// CHECK: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant i8* null +// CHECK: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant i8* null + extern "C" __global__ void ckernel() {} namespace ns { @@ -26,9 +32,9 @@ // Non-template kernel stub functions // CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]] -// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]] // CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]] -// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]] // CHECK-LABEL: define{{.*}}@_Z8hostfuncv() // CHECK: call void @[[CSTUB]]() @@ -45,11 +51,11 @@ // Template kernel stub functions // CHECK: define{{.*}}@[[TSTUB]] -// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]] // CHECK: declare{{.*}}@[[DSTUB]] // CHECK-LABEL: define{{.*}}@__hip_register_globals -// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]] -// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]] -// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]] Index: clang/test/CodeGenCUDA/unnamed-types.cu =================================================================== --- clang/test/CodeGenCUDA/unnamed-types.cu +++ clang/test/CodeGenCUDA/unnamed-types.cu @@ -36,4 +36,4 @@ }(p); } // HOST: @__hip_register_globals -// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0 +// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0