diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt @@ -57,12 +57,13 @@ set(cuda_sources ${devicertl_base_directory}/common/src/cancel.cu ${devicertl_base_directory}/common/src/critical.cu - ${devicertl_base_directory}/common/src/loop.cu + ${devicertl_base_directory}/common/src/data_sharing.cu ${devicertl_base_directory}/common/src/libcall.cu - ${devicertl_base_directory}/common/src/reduction.cu + ${devicertl_base_directory}/common/src/loop.cu ${devicertl_base_directory}/common/src/omp_data.cu ${devicertl_base_directory}/common/src/omptarget.cu ${devicertl_base_directory}/common/src/parallel.cu + ${devicertl_base_directory}/common/src/reduction.cu ${devicertl_base_directory}/common/src/sync.cu ${devicertl_base_directory}/common/src/task.cu) diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -101,6 +101,8 @@ return __lanemask_gt(); } +EXTERN bool __kmpc_impl_is_first_active_thread(); + INLINE uint32_t __kmpc_impl_smid() { return __smid(); } diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu rename from openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu rename to openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu @@ -1,4 +1,4 @@ -//===----- data_sharing.cu - NVPTX OpenMP debug utilities -------- CUDA -*-===// +//===----- data_sharing.cu - OpenMP GPU data sharing ------------- CUDA -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,21 +6,13 @@ // //===----------------------------------------------------------------------===// // -// This file contains the implementation of data sharing environments/ +// This file contains the implementation of data sharing environments // //===----------------------------------------------------------------------===// #include "common/omptarget.h" #include "target_impl.h" #include -// Return true if this is the first active thread in the warp. -INLINE static bool IsWarpMasterActiveThread() { - unsigned long long Mask = __kmpc_impl_activemask(); - unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE); - unsigned long long Sh = Mask << ShNum; - // Truncate Sh to the 32 lower bits - return (unsigned)Sh == 0; -} // Return true if this is the master thread. INLINE static bool IsMasterThread(bool isSPMDExecutionMode) { return !isSPMDExecutionMode && GetMasterThreadID() == GetThreadIdInBlock(); @@ -128,7 +120,7 @@ DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT); // Only the warp active master needs to grow the stack. - if (IsWarpMasterActiveThread()) { + if (__kmpc_impl_is_first_active_thread()) { // Save the current active threads. ActiveT = CurActiveThreads; @@ -229,7 +221,7 @@ unsigned WID = GetWarpId(); if (IsEntryPoint) { - if (IsWarpMasterActiveThread()) { + if (__kmpc_impl_is_first_active_thread()) { DSPRINT0(DSFLAG, "Doing clean up\n"); // The master thread cleans the saved slot, because this is an environment @@ -255,7 +247,7 @@ // warp diverged and returns in different places). This only works if we // assume that threads will converge right after the call site that started // the environment. - if (IsWarpMasterActiveThread()) { + if (__kmpc_impl_is_first_active_thread()) { __kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID]; DSPRINT0(DSFLAG, "Before restoring the stack\n"); diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -53,9 +53,8 @@ set(cuda_src_files ${devicertl_common_directory}/src/cancel.cu ${devicertl_common_directory}/src/critical.cu - src/data_sharing.cu + ${devicertl_common_directory}/src/data_sharing.cu ${devicertl_common_directory}/src/libcall.cu - src/target_impl.cu ${devicertl_common_directory}/src/loop.cu ${devicertl_common_directory}/src/omptarget.cu ${devicertl_common_directory}/src/parallel.cu @@ -63,6 +62,7 @@ ${devicertl_common_directory}/src/support.cu ${devicertl_common_directory}/src/sync.cu ${devicertl_common_directory}/src/task.cu + src/target_impl.cu ) set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -94,6 +94,15 @@ return res; } +// Return true if this is the first active thread in the warp. +INLINE bool __kmpc_impl_is_first_active_thread() { + unsigned long long Mask = __kmpc_impl_activemask(); + unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE); + unsigned long long Sh = Mask << ShNum; + // Truncate Sh to the 32 lower bits + return (unsigned)Sh == 0; +} + INLINE uint32_t __kmpc_impl_smid() { uint32_t id; asm("mov.u32 %0, %%smid;" : "=r"(id));