Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu @@ -96,7 +96,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin( __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack, - void **SavedSharedFrame, int32_t *SavedActiveThreads, + void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads, size_t SharingDataSize, size_t SharingDefaultDataSize, int16_t IsOMPRuntimeInitialized) { @@ -117,7 +117,7 @@ __kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID]; void *&StackP = DataSharingState.StackPtr[WID]; void * volatile &FrameP = DataSharingState.FramePtr[WID]; - int32_t &ActiveT = DataSharingState.ActiveThreads[WID]; + __kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID]; DSPRINT0(DSFLAG, "Save current slot/stack values.\n"); // Save the current values. @@ -225,7 +225,7 @@ EXTERN void __kmpc_data_sharing_environment_end( __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack, - void **SavedSharedFrame, int32_t *SavedActiveThreads, + void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads, int32_t IsEntryPoint) { DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_end\n"); @@ -260,7 +260,7 @@ // assume that threads will converge right after the call site that started // the environment. if (IsWarpMasterActiveThread()) { - int32_t &ActiveT = DataSharingState.ActiveThreads[WID]; + __kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID]; DSPRINT0(DSFLAG, "Before restoring the stack\n"); // Zero the bits in the mask. If it is still different from zero, then we Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h @@ -19,6 +19,7 @@ #define _INTERFACES_H_ #include "option.h" +#include "target_impl.h" //////////////////////////////////////////////////////////////////////////////// // OpenMP interface @@ -422,9 +423,9 @@ EXTERN void __kmpc_flush(kmp_Ident *loc); // vote -EXTERN int32_t __kmpc_warp_active_thread_mask(); +EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask(); // syncwarp -EXTERN void __kmpc_syncwarp(int32_t); +EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t); // tasks EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc, @@ -475,11 +476,13 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn, int16_t IsOMPRuntimeInitialized); EXTERN void __kmpc_kernel_end_parallel(); -EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask, +EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, + __kmpc_impl_lanemask_t Mask, bool *IsFinal, int32_t *LaneSource); EXTERN void __kmpc_kernel_end_convergent_parallel(void *buffer); -EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask, +EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, + __kmpc_impl_lanemask_t Mask, bool *IsFinal, int32_t *LaneSource, int32_t *LaneId, int32_t *NumLanes); EXTERN void __kmpc_kernel_end_convergent_simd(void *buffer); @@ -510,12 +513,13 @@ size_t InitialDataSize); EXTERN void *__kmpc_data_sharing_environment_begin( __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack, - void **SavedSharedFrame, int32_t *SavedActiveThreads, + void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads, size_t SharingDataSize, size_t SharingDefaultDataSize, int16_t IsOMPRuntimeInitialized); EXTERN void __kmpc_data_sharing_environment_end( __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack, - void **SavedSharedFrame, int32_t *SavedActiveThreads, int32_t IsEntryPoint); + void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads, + int32_t IsEntryPoint); EXTERN void * __kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID, Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu @@ -380,7 +380,8 @@ //////////////////////////////////////////////////////////////////////////////// // Support for dispatch next - INLINE static int64_t Shuffle(unsigned active, int64_t val, int leader) { + INLINE static uint64_t Shuffle(__kmpc_impl_lanemask_t active, int64_t val, + int leader) { uint32_t lo, hi; __kmpc_impl_unpack(val, lo, hi); hi = __kmpc_impl_shfl_sync(active, hi, leader); Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -107,7 +107,7 @@ __kmpc_data_sharing_slot *SlotPtr[DS_Max_Warp_Number]; void *StackPtr[DS_Max_Warp_Number]; void * volatile FramePtr[DS_Max_Warp_Number]; - int32_t ActiveThreads[DS_Max_Warp_Number]; + __kmpc_impl_lanemask_t ActiveThreads[DS_Max_Warp_Number]; }; // Additional worker slot type which is initialized with the default worker slot // size of 4*32 bytes. Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -44,13 +44,14 @@ //////////////////////////////////////////////////////////////////////////////// // support for convergent simd (team of threads in a warp only) //////////////////////////////////////////////////////////////////////////////// -EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask, +EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, + __kmpc_impl_lanemask_t Mask, bool *IsFinal, int32_t *LaneSource, int32_t *LaneId, int32_t *NumLanes) { PRINT0(LD_IO, "call to __kmpc_kernel_convergent_simd\n"); - uint32_t ConvergentMask = Mask; + __kmpc_impl_lanemask_t ConvergentMask = Mask; int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask); - uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1); + __kmpc_impl_lanemask_t WorkRemaining = ConvergentMask >> (*LaneSource + 1); *LaneSource += __kmpc_impl_ffs(WorkRemaining); *IsFinal = __kmpc_impl_popc(WorkRemaining) == 1; __kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt(); @@ -117,13 +118,14 @@ //////////////////////////////////////////////////////////////////////////////// // support for convergent parallelism (team of threads in a warp only) //////////////////////////////////////////////////////////////////////////////// -EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask, +EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, + __kmpc_impl_lanemask_t Mask, bool *IsFinal, int32_t *LaneSource) { PRINT0(LD_IO, "call to __kmpc_kernel_convergent_parallel\n"); - uint32_t ConvergentMask = Mask; + __kmpc_impl_lanemask_t ConvergentMask = Mask; int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask); - uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1); + __kmpc_impl_lanemask_t WorkRemaining = ConvergentMask >> (*LaneSource + 1); *LaneSource += __kmpc_impl_ffs(WorkRemaining); *IsFinal = __kmpc_impl_popc(WorkRemaining) == 1; __kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt(); Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu @@ -140,7 +140,7 @@ // Vote //////////////////////////////////////////////////////////////////////////////// -EXTERN int32_t __kmpc_warp_active_thread_mask() { +EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() { PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n"); return __kmpc_impl_activemask(); } @@ -149,7 +149,7 @@ // Syncwarp //////////////////////////////////////////////////////////////////////////////// -EXTERN void __kmpc_syncwarp(int32_t Mask) { +EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) { PRINT0(LD_IO, "call __kmpc_syncwarp\n"); __kmpc_impl_syncwarp(Mask); }