Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c @@ -0,0 +1,22 @@ +// RUN: %compile-run-and-check + +#include +#include + +int main() { + int res = 0; + +#pragma omp parallel num_threads(2) reduction(+:res) + { + int tid = omp_get_thread_num(); +#pragma omp target teams distribute reduction(+:res) + for (int i = tid; i < 2; i++) + ++res; + } + // The first thread makes 2 iterations, the second - 1. Expected result of the + // reduction res is 3. + + // CHECK: res = 3. + printf("res = %d.\n", res); + return 0; +} Index: openmp/trunk/libomptarget/src/device.h =================================================================== --- openmp/trunk/libomptarget/src/device.h +++ openmp/trunk/libomptarget/src/device.h @@ -96,7 +96,9 @@ std::mutex DataMapMtx, PendingGlobalsMtx, ShadowMtx; - uint64_t loopTripCnt; + // NOTE: Once libomp gains full target-task support, this state should be + // moved into the target task in libomp. + std::map loopTripCnt; int64_t RTLRequiresFlags; @@ -104,7 +106,7 @@ : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), - ShadowMtx(), loopTripCnt(0), RTLRequiresFlags(0) {} + ShadowMtx(), RTLRequiresFlags(0) {} // The existence of mutexes makes DeviceTy non-copyable. We need to // provide a copy constructor and an assignment operator explicitly. Index: openmp/trunk/libomptarget/src/interface.cpp =================================================================== --- openmp/trunk/libomptarget/src/interface.cpp +++ openmp/trunk/libomptarget/src/interface.cpp @@ -304,8 +304,6 @@ arg_sizes, arg_types, team_num, thread_limit); } - -// The trip count mechanism will be revised - this scheme is not thread-safe. EXTERN void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount) { if (device_id == OFFLOAD_DEVICE_DEFAULT) { @@ -320,5 +318,8 @@ DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id, loop_tripcount); - Devices[device_id].loopTripCnt = loop_tripcount; + TblMapMtx.lock(); + Devices[device_id].loopTripCnt.emplace(__kmpc_global_thread_num(NULL), + loop_tripcount); + TblMapMtx.unlock(); } Index: openmp/trunk/libomptarget/src/omptarget.cpp =================================================================== --- openmp/trunk/libomptarget/src/omptarget.cpp +++ openmp/trunk/libomptarget/src/omptarget.cpp @@ -729,8 +729,12 @@ "Size mismatch in arguments and offsets"); // Pop loop trip count - uint64_t ltc = Device.loopTripCnt; - Device.loopTripCnt = 0; + uint64_t ltc = 0; + TblMapMtx.lock(); + auto I = Device.loopTripCnt.find(__kmpc_global_thread_num(NULL)); + if (I != Device.loopTripCnt.end()) + std::swap(ltc, I->second); + TblMapMtx.unlock(); // Launch device execution. DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", Index: openmp/trunk/libomptarget/src/private.h =================================================================== --- openmp/trunk/libomptarget/src/private.h +++ openmp/trunk/libomptarget/src/private.h @@ -65,6 +65,7 @@ // functions that extract info from libomp; keep in sync int omp_get_default_device(void) __attribute__((weak)); int32_t __kmpc_omp_taskwait(void *loc_ref, int32_t gtid) __attribute__((weak)); +int32_t __kmpc_global_thread_num(void *) __attribute__((weak)); int __kmpc_get_target_offload(void) __attribute__((weak)); #ifdef __cplusplus }