diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu @@ -18,20 +18,20 @@ // Execution Parameters //////////////////////////////////////////////////////////////////////////////// -INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) { +DEVICE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) { execution_param = EMode; execution_param |= RMode; } -INLINE bool isGenericMode() { return (execution_param & ModeMask) == Generic; } +DEVICE bool isGenericMode() { return (execution_param & ModeMask) == Generic; } -INLINE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; } +DEVICE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; } -INLINE bool isRuntimeUninitialized() { +DEVICE bool isRuntimeUninitialized() { return (execution_param & RuntimeMask) == RuntimeUninitialized; } -INLINE bool isRuntimeInitialized() { +DEVICE bool isRuntimeInitialized() { return (execution_param & RuntimeMask) == RuntimeInitialized; } @@ -39,7 +39,7 @@ // Execution Modes based on location parameter fields //////////////////////////////////////////////////////////////////////////////// -INLINE bool checkSPMDMode(kmp_Ident *loc) { +DEVICE bool checkSPMDMode(kmp_Ident *loc) { if (!loc) return isSPMDMode(); @@ -57,11 +57,11 @@ return isSPMDMode(); } -INLINE bool checkGenericMode(kmp_Ident *loc) { +DEVICE bool checkGenericMode(kmp_Ident *loc) { return !checkSPMDMode(loc); } -INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) { +DEVICE bool checkRuntimeUninitialized(kmp_Ident *loc) { if (!loc) return isRuntimeUninitialized(); @@ -84,7 +84,7 @@ return isRuntimeUninitialized(); } -INLINE bool checkRuntimeInitialized(kmp_Ident *loc) { +DEVICE bool checkRuntimeInitialized(kmp_Ident *loc) { return !checkRuntimeUninitialized(loc); } @@ -98,17 +98,17 @@ // //////////////////////////////////////////////////////////////////////////////// -INLINE int GetThreadIdInBlock() { return threadIdx.x; } +DEVICE int GetThreadIdInBlock() { return threadIdx.x; } -INLINE int GetBlockIdInKernel() { return blockIdx.x; } +DEVICE int GetBlockIdInKernel() { return blockIdx.x; } -INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; } +DEVICE int GetNumberOfBlocksInKernel() { return gridDim.x; } -INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; } +DEVICE int GetNumberOfThreadsInBlock() { return blockDim.x; } -INLINE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; } +DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; } -INLINE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); } +DEVICE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); } //////////////////////////////////////////////////////////////////////////////// // @@ -124,13 +124,13 @@ // If NumThreads is 1024, master id is 992. // // Called in Generic Execution Mode only. -INLINE int GetMasterThreadID() { +DEVICE int GetMasterThreadID() { return (GetNumberOfThreadsInBlock() - 1) & ~(WARPSIZE - 1); } // The last warp is reserved for the master; other warps are workers. // Called in Generic Execution Mode only. -INLINE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); } +DEVICE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); } //////////////////////////////////////////////////////////////////////////////// // get thread id in team @@ -139,7 +139,7 @@ // or a serial region by the master. If the master (whose CUDA thread // id is GetMasterThreadID()) calls this routine, we return 0 because // it is a shadow for the first worker. -INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) { +DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) { // Implemented using control flow (predication) instead of with a modulo // operation. int tid = GetThreadIdInBlock(); @@ -155,7 +155,7 @@ // //////////////////////////////////////////////////////////////////////////////// -INLINE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) { +DEVICE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) { // omp_thread_num int rc; if ((parallelLevel[GetWarpId()] & (OMP_ACTIVE_PARALLEL_LEVEL - 1)) > 1) { @@ -171,7 +171,7 @@ return rc; } -INLINE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) { +DEVICE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) { // omp_num_threads int rc; int Level = parallelLevel[GetWarpId()]; @@ -189,12 +189,12 @@ //////////////////////////////////////////////////////////////////////////////// // Team id linked to OpenMP -INLINE int GetOmpTeamId() { +DEVICE int GetOmpTeamId() { // omp_team_num return GetBlockIdInKernel(); // assume 1 block per team } -INLINE int GetNumberOfOmpTeams() { +DEVICE int GetNumberOfOmpTeams() { // omp_num_teams return GetNumberOfBlocksInKernel(); // assume 1 block per team } @@ -202,12 +202,12 @@ //////////////////////////////////////////////////////////////////////////////// // Masters -INLINE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); } +DEVICE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); } //////////////////////////////////////////////////////////////////////////////// // Parallel level -INLINE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { +DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { __kmpc_impl_syncwarp(Mask); __kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt(); unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt); @@ -219,7 +219,7 @@ __kmpc_impl_syncwarp(Mask); } -INLINE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { +DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) { __kmpc_impl_syncwarp(Mask); __kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt(); unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt); @@ -235,13 +235,13 @@ // get OpenMP number of procs // Get the number of processors in the device. -INLINE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) { +DEVICE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) { if (!isSPMDExecutionMode) return GetNumberOfWorkersInTeam(); return GetNumberOfThreadsInBlock(); } -INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) { +DEVICE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) { return GetNumberOfProcsInDevice(isSPMDExecutionMode); } @@ -249,7 +249,7 @@ // Memory //////////////////////////////////////////////////////////////////////////////// -INLINE unsigned long PadBytes(unsigned long size, +DEVICE unsigned long PadBytes(unsigned long size, unsigned long alignment) // must be a power of 2 { // compute the necessary padding to satisfy alignment constraint @@ -258,7 +258,7 @@ return (~(unsigned long)size + 1) & (alignment - 1); } -INLINE void *SafeMalloc(size_t size, const char *msg) // check if success +DEVICE void *SafeMalloc(size_t size, const char *msg) // check if success { void *ptr = malloc(size); PRINT(LD_MEM, "malloc data of size %llu for %s: 0x%llx\n", @@ -266,7 +266,7 @@ return ptr; } -INLINE void *SafeFree(void *ptr, const char *msg) { +DEVICE void *SafeFree(void *ptr, const char *msg) { PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg); free(ptr); return NULL; @@ -276,14 +276,14 @@ // Teams Reduction Scratchpad Helpers //////////////////////////////////////////////////////////////////////////////// -INLINE unsigned int *GetTeamsReductionTimestamp() { +DEVICE unsigned int *GetTeamsReductionTimestamp() { return static_cast(ReductionScratchpadPtr); } -INLINE char *GetTeamsReductionScratchpad() { +DEVICE char *GetTeamsReductionScratchpad() { return static_cast(ReductionScratchpadPtr) + 256; } -INLINE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr) { +DEVICE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr) { ReductionScratchpadPtr = ScratchpadPtr; }