diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip @@ -122,12 +122,12 @@ } } // namespace -EXTERN int GetNumberOfBlocksInKernel() { +EXTERN int __kmpc_get_hardware_num_blocks() { return get_grid_dim(__builtin_amdgcn_grid_size_x(), __builtin_amdgcn_workgroup_size_x()); } -EXTERN int GetNumberOfThreadsInBlock() { +EXTERN int __kmpc_get_hardware_num_threads_in_block() { return get_workgroup_dim(__builtin_amdgcn_workgroup_id_x(), __builtin_amdgcn_grid_size_x(), __builtin_amdgcn_workgroup_size_x()); @@ -140,7 +140,7 @@ } EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() { - return GetNumberOfThreadsInBlock(); + return __kmpc_get_hardware_num_threads_in_block(); } // Atomics diff --git a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu --- a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu @@ -61,7 +61,7 @@ EXTERN int omp_get_thread_limit(void) { if (__kmpc_is_spmd_exec_mode()) - return GetNumberOfThreadsInBlock(); + return __kmpc_get_hardware_num_threads_in_block(); int rc = threadLimit; PRINT(LD_IO, "call omp_get_thread_limit() return %d\n", rc); return rc; @@ -196,7 +196,7 @@ EXTERN int omp_get_team_size(int level) { if (__kmpc_is_spmd_exec_mode()) - return level == 1 ? GetNumberOfThreadsInBlock() : 1; + return level == 1 ? __kmpc_get_hardware_num_threads_in_block() : 1; int rc = -1; unsigned parLevel = parallelLevel[GetWarpId()]; // If level is 0 or all parallel regions are not active - return 1. diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu --- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu @@ -94,7 +94,9 @@ if (GetLaneId() == 0) { parallelLevel[GetWarpId()] = - 1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0); + 1 + (__kmpc_get_hardware_num_threads_in_block() > 1 + ? OMP_ACTIVE_PARALLEL_LEVEL + : 0); } __kmpc_data_sharing_init_stack(); diff --git a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu --- a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu @@ -199,7 +199,7 @@ __kmpc_is_spmd_exec_mode() ? GetNumberOfOmpThreads(/*isSPMDExecutionMode=*/true) : /*Master thread only*/ 1; uint32_t TeamId = GetBlockIdInKernel(); - uint32_t NumTeams = GetNumberOfBlocksInKernel(); + uint32_t NumTeams = __kmpc_get_hardware_num_blocks(); static unsigned SHARED(Bound); static unsigned SHARED(ChunkTeamCount); diff --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu --- a/openmp/libomptarget/deviceRTLs/common/src/support.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu @@ -53,7 +53,7 @@ // // Called in Generic Execution Mode only. int GetMasterThreadID() { - return (GetNumberOfThreadsInBlock() - 1) & ~(WARPSIZE - 1); + return (__kmpc_get_hardware_num_threads_in_block() - 1) & ~(WARPSIZE - 1); } // The last warp is reserved for the master; other warps are workers. @@ -109,7 +109,7 @@ if (Level != OMP_ACTIVE_PARALLEL_LEVEL + 1) { rc = 1; } else if (isSPMDExecutionMode) { - rc = GetNumberOfThreadsInBlock(); + rc = __kmpc_get_hardware_num_threads_in_block(); } else { rc = threadsInTeam; } @@ -127,7 +127,7 @@ int GetNumberOfOmpTeams() { // omp_num_teams - return GetNumberOfBlocksInKernel(); // assume 1 block per team + return __kmpc_get_hardware_num_blocks(); // assume 1 block per team } //////////////////////////////////////////////////////////////////////////////// @@ -169,7 +169,7 @@ int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) { if (!isSPMDExecutionMode) return GetNumberOfWorkersInTeam(); - return GetNumberOfThreadsInBlock(); + return __kmpc_get_hardware_num_threads_in_block(); } int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) { diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu @@ -96,10 +96,12 @@ return __nvvm_read_ptx_sreg_tid_x(); } EXTERN int GetBlockIdInKernel() { return __nvvm_read_ptx_sreg_ctaid_x(); } -EXTERN int GetNumberOfBlocksInKernel() { +EXTERN int __kmpc_get_hardware_num_blocks() { return __nvvm_read_ptx_sreg_nctaid_x(); } -EXTERN int GetNumberOfThreadsInBlock() { return __nvvm_read_ptx_sreg_ntid_x(); } +EXTERN int __kmpc_get_hardware_num_threads_in_block() { + return __nvvm_read_ptx_sreg_ntid_x(); +} EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; } diff --git a/openmp/libomptarget/deviceRTLs/target_interface.h b/openmp/libomptarget/deviceRTLs/target_interface.h --- a/openmp/libomptarget/deviceRTLs/target_interface.h +++ b/openmp/libomptarget/deviceRTLs/target_interface.h @@ -18,8 +18,8 @@ // Calls to the NVPTX layer (assuming 1D layout) EXTERN int __kmpc_get_hardware_thread_id_in_block(); EXTERN int GetBlockIdInKernel(); -EXTERN int GetNumberOfBlocksInKernel(); -EXTERN int GetNumberOfThreadsInBlock(); +EXTERN int __kmpc_get_hardware_num_blocks(); +EXTERN int __kmpc_get_hardware_num_threads_in_block(); EXTERN unsigned GetWarpId(); EXTERN unsigned GetWarpSize(); EXTERN unsigned GetLaneId();