diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp @@ -49,13 +49,12 @@ llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; llvm::Module *M = &CGF.CGM.getModule(); - const char *LocSize = "__ockl_get_local_size"; + const char *LocSize = "__kmpc_amdgcn_gpu_num_threads"; llvm::Function *F = M->getFunction(LocSize); if (!F) { F = llvm::Function::Create( - llvm::FunctionType::get(CGF.Int64Ty, {CGF.Int32Ty}, false), + llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false), llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); } - return Bld.CreateTrunc( - Bld.CreateCall(F, {Bld.getInt32(0)}, "nvptx_num_threads"), CGF.Int32Ty); + return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); } diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h @@ -15,4 +15,6 @@ typedef uint64_t __kmpc_impl_lanemask_t; typedef uint32_t omp_lock_t; /* arbitrary type of the right length */ +EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads(void); + #endif diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip @@ -144,6 +144,11 @@ return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); } +EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads(void) +{ + return GetNumberOfThreadsInBlock(); +} + // Stub implementations -DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr } +DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr; } DEVICE void __kmpc_impl_free(void *) {}