Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu @@ -564,7 +564,8 @@ if (GetThreadIdInBlock() == 0) { *frame = omptarget_nvptx_simpleMemoryManager.Acquire(buf, size); } - __syncthreads(); + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. + __SYNCTHREADS(); return; } ASSERT0(LT_FUSSY, GetThreadIdInBlock() == getMasterThreadId(), @@ -577,7 +578,8 @@ if (is_shared) return; if (isSPMDMode()) { - __syncthreads(); + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. + __SYNCTHREADS(); if (GetThreadIdInBlock() == 0) { omptarget_nvptx_simpleMemoryManager.Release(); } Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -63,6 +63,9 @@ #define __ACTIVEMASK() __ballot(1) #endif +#define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory"); +#define __SYNCTHREADS() __SYNCTHREADS_N(0) + // arguments needed for L0 parallelism only. class omptarget_nvptx_SharedArgs { public: Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu @@ -105,7 +105,8 @@ omptarget_nvptx_simpleThreadPrivateContext = omptarget_nvptx_device_simpleState[slot].Dequeue(); } - __syncthreads(); + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. + __SYNCTHREADS(); omptarget_nvptx_simpleThreadPrivateContext->Init(); return; } @@ -129,7 +130,8 @@ // init team context currTeamDescr.InitTeamDescr(); } - __syncthreads(); + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. + __SYNCTHREADS(); omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor(); omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor(); @@ -170,7 +172,8 @@ EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) { // We're not going to pop the task descr stack of each thread since // there are no more parallel regions in SPMD mode. - __syncthreads(); + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. + __SYNCTHREADS(); int threadId = GetThreadIdInBlock(); if (!RequiresOMPRuntime) { if (threadId == 0) { Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu @@ -74,7 +74,8 @@ // parallel region and that all worker threads participate. EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) { PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n"); - __syncthreads(); + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. + __SYNCTHREADS(); PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n"); }