diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu @@ -11,13 +11,9 @@ //===----------------------------------------------------------------------===// #include "omptarget-nvptx.h" #include "target_impl.h" +#include "support.h" #include -// Warp ID in the CUDA block -INLINE static unsigned getWarpId() { return threadIdx.x / WARPSIZE; } -// Lane ID in the CUDA warp. -INLINE static unsigned getLaneId() { return threadIdx.x % WARPSIZE; } - // Return true if this is the first active thread in the warp. INLINE static bool IsWarpMasterActiveThread() { unsigned long long Mask = __kmpc_impl_activemask(); @@ -358,7 +354,7 @@ // This function initializes the stack pointer with the pointer to the // statically allocated shared memory slots. The size of a shared memory // slot is pre-determined to be 256 bytes. - if (threadIdx.x == 0) + if (GetThreadIdInBlock() == 0) data_sharing_init_stack_common(); __threadfence_block(); diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h b/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h --- a/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h @@ -128,12 +128,12 @@ #if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING #include -#include "target_impl.h" +#include "support.h" template NOINLINE static void log(const char *fmt, Arguments... parameters) { - printf(fmt, (int)blockIdx.x, (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE), - (int)(threadIdx.x & 0x1F), parameters...); + printf(fmt, (int)GetBlockIdInKernel(), (int)GetThreadIdInBlock(), + (int)(GetWarpId()), (int)(GetLaneId()), parameters...); } #endif @@ -144,9 +144,8 @@ NOINLINE static void check(bool cond, const char *fmt, Arguments... parameters) { if (!cond) - printf(fmt, (int)blockIdx.x, (int)threadIdx.x, - (int)(threadIdx.x / WARPSIZE), (int)(threadIdx.x & 0x1F), - parameters...); + printf(fmt, (int)GetBlockIdInKernel(), (int)GetThreadIdInBlock(), + (int)(GetWarpId()), (int)(GetLaneId()), parameters...); assert(cond); } diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -364,7 +364,7 @@ for (;;) { now = clock(); clock_t cycles = now > start ? now - start : now + (0xffffffff - start); - if (cycles >= __OMP_SPIN * blockIdx.x) { + if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) { break; } } diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h --- a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h @@ -104,9 +104,9 @@ INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; } -INLINE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; } +INLINE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; } -INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); } +INLINE unsigned GetLaneId() { return GetThreadIdInBlock() % WARPSIZE; } //////////////////////////////////////////////////////////////////////////////// // @@ -122,7 +122,9 @@ // If NumThreads is 1024, master id is 992. // // Called in Generic Execution Mode only. -INLINE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); } +INLINE int GetMasterThreadID() { + return (GetNumberOfThreadsInBlock() - 1) & ~(WARPSIZE - 1); +} // The last warp is reserved for the master; other warps are workers. // Called in Generic Execution Mode only.