Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -52,11 +52,8 @@ #error CUDA_VERSION macro is undefined, something wrong with cuda. #elif CUDA_VERSION >= 9000 #define __ACTIVEMASK() __activemask() -#define __SYNCWARP(Mask) __syncwarp(Mask) #else #define __ACTIVEMASK() __ballot(1) -// In Cuda < 9.0 no need to sync threads in warps. -#define __SYNCWARP(Mask) #endif // CUDA_VERSION #define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory"); Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h @@ -14,6 +14,8 @@ // Execution Parameters //////////////////////////////////////////////////////////////////////////////// +#include "target_impl.h" + INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) { execution_param = EMode; execution_param |= RMode; @@ -203,7 +205,7 @@ INLINE void IncParallelLevel(bool ActiveParallel) { unsigned Active = __ACTIVEMASK(); - __SYNCWARP(Active); + __kmpc_impl_syncwarp(Active); unsigned LaneMaskLt; asm("mov.u32 %0, %%lanemask_lt;" : "=r"(LaneMaskLt)); unsigned Rank = __popc(Active & LaneMaskLt); @@ -212,12 +214,12 @@ (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0)); __threadfence(); } - __SYNCWARP(Active); + __kmpc_impl_syncwarp(Active); } INLINE void DecParallelLevel(bool ActiveParallel) { unsigned Active = __ACTIVEMASK(); - __SYNCWARP(Active); + __kmpc_impl_syncwarp(Active); unsigned LaneMaskLt; asm("mov.u32 %0, %%lanemask_lt;" : "=r"(LaneMaskLt)); unsigned Rank = __popc(Active & LaneMaskLt); @@ -226,7 +228,7 @@ (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0)); __threadfence(); } - __SYNCWARP(Active); + __kmpc_impl_syncwarp(Active); } //////////////////////////////////////////////////////////////////////////////// Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -63,6 +63,12 @@ #endif // CUDA_VERSION } -INLINE void __kmpc_impl_syncwarp(int32_t Mask) { __SYNCWARP(Mask); } +INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) { +#if CUDA_VERSION >= 9000 + __syncwarp(Mask); +#else + // In Cuda < 9.0 no need to sync threads in warps. +#endif // CUDA_VERSION +} #endif