diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -15,16 +15,12 @@ #define __OMPTARGET_NVPTX_H // std includes -#include -#include - #include - -// cuda includes -#include #include +#include // local includes +#include "target_impl.h" #include "debug.h" // debug #include "interface.h" // interfaces with omp, compiler, and user #include "option.h" // choices we have @@ -86,20 +82,6 @@ extern __device__ __shared__ omptarget_nvptx_SharedArgs omptarget_nvptx_globalArgs; -// Data sharing related quantities, need to match what is used in the compiler. -enum DATA_SHARING_SIZES { - // The maximum number of workers in a kernel. - DS_Max_Worker_Threads = 992, - // The size reserved for data in a shared memory slot. - DS_Slot_Size = 256, - // The slot size that should be reserved for a working warp. - DS_Worker_Warp_Slot_Size = WARPSIZE * DS_Slot_Size, - // The maximum number of warps in use - DS_Max_Warp_Number = 32, - // The size of the preallocated shared memory buffer per team - DS_Shared_Memory_Size = 128, -}; - // Data structure to keep in shared memory that traces the current slot, stack, // and frame pointer as well as the active threads that didn't exit the current // environment. diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -12,10 +12,25 @@ #ifndef _TARGET_IMPL_H_ #define _TARGET_IMPL_H_ +#include #include #include "option.h" +// Data sharing related quantities, need to match what is used in the compiler. +enum DATA_SHARING_SIZES { + // The maximum number of workers in a kernel. + DS_Max_Worker_Threads = 992, + // The size reserved for data in a shared memory slot. + DS_Slot_Size = 256, + // The slot size that should be reserved for a working warp. + DS_Worker_Warp_Slot_Size = WARPSIZE * DS_Slot_Size, + // The maximum number of warps in use + DS_Max_Warp_Number = 32, + // The size of the preallocated shared memory buffer per team + DS_Shared_Memory_Size = 128, +}; + INLINE void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) { asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val)); }