diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -55,6 +55,7 @@ src/omptarget-nvptx.cu src/parallel.cu src/reduction.cu + src/support.cu src/sync.cu src/task.cu ) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -385,6 +385,5 @@ //////////////////////////////////////////////////////////////////////////////// #include "omptarget-nvptxi.h" -#include "supporti.h" #endif diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h b/openmp/libomptarget/deviceRTLs/nvptx/src/support.h --- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.h @@ -10,7 +10,12 @@ // //===----------------------------------------------------------------------===// +#ifndef _OMPTARGET_SUPPORT_H_ +#define _OMPTARGET_SUPPORT_H_ + +#include "interface.h" #include "target_impl.h" + //////////////////////////////////////////////////////////////////////////////// // Execution Parameters //////////////////////////////////////////////////////////////////////////////// @@ -26,58 +31,70 @@ RuntimeMask = 0x02u, }; -INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode); -INLINE bool isGenericMode(); -INLINE bool isSPMDMode(); -INLINE bool isRuntimeUninitialized(); -INLINE bool isRuntimeInitialized(); +EXTERN void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode); +EXTERN bool isGenericMode(); +EXTERN bool isSPMDMode(); +EXTERN bool isRuntimeUninitialized(); +EXTERN bool isRuntimeInitialized(); + +//////////////////////////////////////////////////////////////////////////////// +// Execution Modes based on location parameter fields +//////////////////////////////////////////////////////////////////////////////// + +EXTERN bool checkSPMDMode(kmp_Ident *loc); + +EXTERN bool checkGenericMode(kmp_Ident *loc); + +EXTERN bool checkRuntimeUninitialized(kmp_Ident *loc); + +EXTERN bool checkRuntimeInitialized(kmp_Ident *loc); //////////////////////////////////////////////////////////////////////////////// // get info from machine //////////////////////////////////////////////////////////////////////////////// // get low level ids of resources -INLINE int GetThreadIdInBlock(); -INLINE int GetBlockIdInKernel(); -INLINE int GetNumberOfBlocksInKernel(); -INLINE int GetNumberOfThreadsInBlock(); -INLINE unsigned GetWarpId(); -INLINE unsigned GetLaneId(); +EXTERN int GetThreadIdInBlock(); +EXTERN int GetBlockIdInKernel(); +EXTERN int GetNumberOfBlocksInKernel(); +EXTERN int GetNumberOfThreadsInBlock(); +EXTERN unsigned GetWarpId(); +EXTERN unsigned GetLaneId(); // get global ids to locate tread/team info (constant regardless of OMP) -INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode); -INLINE int GetMasterThreadID(); -INLINE int GetNumberOfWorkersInTeam(); +EXTERN int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode); +EXTERN int GetMasterThreadID(); +EXTERN int GetNumberOfWorkersInTeam(); // get OpenMP thread and team ids -INLINE int GetOmpThreadId(int threadId, +EXTERN int GetOmpThreadId(int threadId, bool isSPMDExecutionMode); // omp_thread_num -INLINE int GetOmpTeamId(); // omp_team_num +EXTERN int GetOmpTeamId(); // omp_team_num // get OpenMP number of threads and team -INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode); // omp_num_threads -INLINE int GetNumberOfOmpTeams(); // omp_num_teams +EXTERN int GetNumberOfOmpThreads(bool isSPMDExecutionMode); // omp_num_threads +EXTERN int GetNumberOfOmpTeams(); // omp_num_teams // get OpenMP number of procs -INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode); -INLINE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode); +EXTERN int GetNumberOfProcsInTeam(bool isSPMDExecutionMode); +EXTERN int GetNumberOfProcsInDevice(bool isSPMDExecutionMode); // masters -INLINE int IsTeamMaster(int ompThreadId); +EXTERN int IsTeamMaster(int ompThreadId); // Parallel level -INLINE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask); -INLINE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask); +EXTERN void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask); +EXTERN void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask); //////////////////////////////////////////////////////////////////////////////// // Memory //////////////////////////////////////////////////////////////////////////////// // safe alloc and free -INLINE void *SafeMalloc(size_t size, const char *msg); // check if success -INLINE void *SafeFree(void *ptr, const char *msg); +EXTERN void *SafeMalloc(size_t size, const char *msg); // check if success +EXTERN void *SafeFree(void *ptr, const char *msg); // pad to a alignment (power of 2 only) -INLINE unsigned long PadBytes(unsigned long size, unsigned long alignment); +EXTERN unsigned long PadBytes(unsigned long size, unsigned long alignment); #define ADD_BYTES(_addr, _bytes) \ ((void *)((char *)((void *)(_addr)) + (_bytes))) #define SUB_BYTES(_addr, _bytes) \ @@ -86,6 +103,8 @@ //////////////////////////////////////////////////////////////////////////////// // Teams Reduction Scratchpad Helpers //////////////////////////////////////////////////////////////////////////////// -INLINE unsigned int *GetTeamsReductionTimestamp(); -INLINE char *GetTeamsReductionScratchpad(); -INLINE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr); +EXTERN unsigned int *GetTeamsReductionTimestamp(); +EXTERN char *GetTeamsReductionScratchpad(); +EXTERN void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr); + +#endif diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu rename from openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h rename to openmp/libomptarget/deviceRTLs/nvptx/src/support.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu @@ -1,4 +1,4 @@ -//===--------- supporti.h - NVPTX OpenMP support functions ------- CUDA -*-===// +//===--------- support.cu - NVPTX OpenMP support functions ------- CUDA -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,26 +10,28 @@ // //===----------------------------------------------------------------------===// +#include "support.h" +#include "debug.h" +#include "omptarget-nvptx.h" + //////////////////////////////////////////////////////////////////////////////// // Execution Parameters //////////////////////////////////////////////////////////////////////////////// -#include "target_impl.h" - -INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) { +EXTERN void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) { execution_param = EMode; execution_param |= RMode; } -INLINE bool isGenericMode() { return (execution_param & ModeMask) == Generic; } +EXTERN bool isGenericMode() { return (execution_param & ModeMask) == Generic; } -INLINE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; } +EXTERN bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; } -INLINE bool isRuntimeUninitialized() { +EXTERN bool isRuntimeUninitialized() { return (execution_param & RuntimeMask) == RuntimeUninitialized; } -INLINE bool isRuntimeInitialized() { +EXTERN bool isRuntimeInitialized() { return (execution_param & RuntimeMask) == RuntimeInitialized; } @@ -37,7 +39,7 @@ // Execution Modes based on location parameter fields //////////////////////////////////////////////////////////////////////////////// -INLINE bool checkSPMDMode(kmp_Ident *loc) { +EXTERN bool checkSPMDMode(kmp_Ident *loc) { if (!loc) return isSPMDMode(); @@ -55,11 +57,11 @@ return isSPMDMode(); } -INLINE bool checkGenericMode(kmp_Ident *loc) { +EXTERN bool checkGenericMode(kmp_Ident *loc) { return !checkSPMDMode(loc); } -INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) { +EXTERN bool checkRuntimeUninitialized(kmp_Ident *loc) { if (!loc) return isRuntimeUninitialized(); @@ -82,7 +84,7 @@ return isRuntimeUninitialized(); } -INLINE bool checkRuntimeInitialized(kmp_Ident *loc) { +EXTERN bool checkRuntimeInitialized(kmp_Ident *loc) { return !checkRuntimeUninitialized(loc); } @@ -96,17 +98,17 @@ // //////////////////////////////////////////////////////////////////////////////// -INLINE int GetThreadIdInBlock() { return threadIdx.x; } +EXTERN int GetThreadIdInBlock() { return threadIdx.x; } -INLINE int GetBlockIdInKernel() { return blockIdx.x; } +EXTERN int GetBlockIdInKernel() { return blockIdx.x; } -INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; } +EXTERN int GetNumberOfBlocksInKernel() { return gridDim.x; } -INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; } +EXTERN int GetNumberOfThreadsInBlock() { return blockDim.x; } -INLINE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; } +EXTERN unsigned GetWarpId() { return threadIdx.x / WARPSIZE; } -INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); } +EXTERN unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); } //////////////////////////////////////////////////////////////////////////////// // @@ -114,20 +116,6 @@ // //////////////////////////////////////////////////////////////////////////////// -// The master thread id is the first thread (lane) of the last warp. -// Thread id is 0 indexed. -// E.g: If NumThreads is 33, master id is 32. -// If NumThreads is 64, master id is 32. -// If NumThreads is 97, master id is 96. -// If NumThreads is 1024, master id is 992. -// -// Called in Generic Execution Mode only. -INLINE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); } - -// The last warp is reserved for the master; other warps are workers. -// Called in Generic Execution Mode only. -INLINE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); } - //////////////////////////////////////////////////////////////////////////////// // get thread id in team @@ -135,7 +123,7 @@ // or a serial region by the master. If the master (whose CUDA thread // id is GetMasterThreadID()) calls this routine, we return 0 because // it is a shadow for the first worker. -INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) { +EXTERN int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) { // Implemented using control flow (predication) instead of with a modulo // operation. int tid = GetThreadIdInBlock(); @@ -145,13 +133,27 @@ return tid; } +// The master thread id is the first thread (lane) of the last warp. +// Thread id is 0 indexed. +// E.g: If NumThreads is 33, master id is 32. +// If NumThreads is 64, master id is 32. +// If NumThreads is 97, master id is 96. +// If NumThreads is 1024, master id is 992. +// +// Called in Generic Execution Mode only. +EXTERN int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); } + +// The last warp is reserved for the master; other warps are workers. +// Called in Generic Execution Mode only. +EXTERN int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); } + //////////////////////////////////////////////////////////////////////////////// // // OpenMP Thread Support Layer // //////////////////////////////////////////////////////////////////////////////// -INLINE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) { +EXTERN int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) { // omp_thread_num int rc; if ((parallelLevel[GetWarpId()] & (OMP_ACTIVE_PARALLEL_LEVEL - 1)) > 1) { @@ -167,7 +169,12 @@ return rc; } -INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) { +EXTERN int GetOmpTeamId() { + // omp_team_num + return GetBlockIdInKernel(); // assume 1 block per team +} + +EXTERN int GetNumberOfOmpThreads(bool isSPMDExecutionMode) { // omp_num_threads int rc; int Level = parallelLevel[GetWarpId()]; @@ -182,28 +189,34 @@ return rc; } +EXTERN int GetNumberOfOmpTeams() { + // omp_num_teams + return GetNumberOfBlocksInKernel(); // assume 1 block per team +} + //////////////////////////////////////////////////////////////////////////////// -// Team id linked to OpenMP +// get OpenMP number of procs -INLINE int GetOmpTeamId() { - // omp_team_num - return GetBlockIdInKernel(); // assume 1 block per team +EXTERN int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) { + return GetNumberOfProcsInDevice(isSPMDExecutionMode); } -INLINE int GetNumberOfOmpTeams() { - // omp_num_teams - return GetNumberOfBlocksInKernel(); // assume 1 block per team +// Get the number of processors in the device. +EXTERN int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) { + if (!isSPMDExecutionMode) + return GetNumberOfWorkersInTeam(); + return GetNumberOfThreadsInBlock(); } //////////////////////////////////////////////////////////////////////////////// // Masters -INLINE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); } +EXTERN int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); } //////////////////////////////////////////////////////////////////////////////// // Parallel level -INLINE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { +EXTERN void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { __kmpc_impl_syncwarp(Mask); __kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt(); unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt); @@ -215,7 +228,7 @@ __kmpc_impl_syncwarp(Mask); } -INLINE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { +EXTERN void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { __kmpc_impl_syncwarp(Mask); __kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt(); unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt); @@ -227,34 +240,11 @@ __kmpc_impl_syncwarp(Mask); } -//////////////////////////////////////////////////////////////////////////////// -// get OpenMP number of procs - -// Get the number of processors in the device. -INLINE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) { - if (!isSPMDExecutionMode) - return GetNumberOfWorkersInTeam(); - return GetNumberOfThreadsInBlock(); -} - -INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) { - return GetNumberOfProcsInDevice(isSPMDExecutionMode); -} - //////////////////////////////////////////////////////////////////////////////// // Memory //////////////////////////////////////////////////////////////////////////////// -INLINE unsigned long PadBytes(unsigned long size, - unsigned long alignment) // must be a power of 2 -{ - // compute the necessary padding to satisfy alignment constraint - ASSERT(LT_FUSSY, (alignment & (alignment - 1)) == 0, - "alignment %lu is not a power of 2\n", alignment); - return (~(unsigned long)size + 1) & (alignment - 1); -} - -INLINE void *SafeMalloc(size_t size, const char *msg) // check if success +EXTERN void *SafeMalloc(size_t size, const char *msg) // check if success { void *ptr = malloc(size); PRINT(LD_MEM, "malloc data of size %llu for %s: 0x%llx\n", @@ -262,24 +252,33 @@ return ptr; } -INLINE void *SafeFree(void *ptr, const char *msg) { +EXTERN void *SafeFree(void *ptr, const char *msg) { PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg); free(ptr); return NULL; } +EXTERN unsigned long PadBytes(unsigned long size, + unsigned long alignment) // must be a power of 2 +{ + // compute the necessary padding to satisfy alignment constraint + ASSERT(LT_FUSSY, (alignment & (alignment - 1)) == 0, + "alignment %lu is not a power of 2\n", alignment); + return (~(unsigned long)size + 1) & (alignment - 1); +} + //////////////////////////////////////////////////////////////////////////////// // Teams Reduction Scratchpad Helpers //////////////////////////////////////////////////////////////////////////////// -INLINE unsigned int *GetTeamsReductionTimestamp() { +EXTERN unsigned int *GetTeamsReductionTimestamp() { return static_cast(ReductionScratchpadPtr); } -INLINE char *GetTeamsReductionScratchpad() { +EXTERN char *GetTeamsReductionScratchpad() { return static_cast(ReductionScratchpadPtr) + 256; } -INLINE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr) { +EXTERN void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr) { ReductionScratchpadPtr = ScratchpadPtr; } diff --git a/openmp/libomptarget/deviceRTLs/nvptx/unity.cu b/openmp/libomptarget/deviceRTLs/nvptx/unity.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/unity.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/unity.cu @@ -21,5 +21,6 @@ #include "src/omptarget-nvptx.cu" #include "src/parallel.cu" #include "src/reduction.cu" +#include "src/support.cu" #include "src/sync.cu" #include "src/task.cu"