Index: libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c =================================================================== --- /dev/null +++ libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c @@ -0,0 +1,22 @@ +// RUN: %compile-run-and-check + +#include +#include + +int main() { + int res = 0; + +#pragma omp parallel num_threads(2) reduction(+:res) + { + int tid = omp_get_thread_num(); +#pragma omp target teams distribute reduction(+:res) + for (int i = tid; i < 2; i++) + ++res; + } + // The first thread makes 2 iterations, the second - 1. Expected result of the + // reduction res is 3. + + // CHECK: res = 3. + printf("res = %d.\n", res); + return 0; +} Index: libomptarget/src/device.h =================================================================== --- libomptarget/src/device.h +++ libomptarget/src/device.h @@ -18,6 +18,7 @@ #include #include #include +#include #include // Forward declarations. @@ -96,7 +97,7 @@ std::mutex DataMapMtx, PendingGlobalsMtx, ShadowMtx; - uint64_t loopTripCnt; + std::map loopTripCnt; int64_t RTLRequiresFlags; @@ -104,7 +105,7 @@ : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), - ShadowMtx(), loopTripCnt(0), RTLRequiresFlags(0) {} + ShadowMtx(), RTLRequiresFlags(0) {} // The existence of mutexes makes DeviceTy non-copyable. We need to // provide a copy constructor and an assignment operator explicitly. Index: libomptarget/src/interface.cpp =================================================================== --- libomptarget/src/interface.cpp +++ libomptarget/src/interface.cpp @@ -20,6 +20,7 @@ #include #include #include +#include // Store target policy (disabled, mandatory, default) kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default; @@ -305,7 +306,6 @@ } -// The trip count mechanism will be revised - this scheme is not thread-safe. EXTERN void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount) { if (device_id == OFFLOAD_DEVICE_DEFAULT) { @@ -320,5 +320,8 @@ DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id, loop_tripcount); - Devices[device_id].loopTripCnt = loop_tripcount; + TblMapMtx.lock(); + Devices[device_id].loopTripCnt.emplace(std::this_thread::get_id(), + loop_tripcount); + TblMapMtx.unlock(); } Index: libomptarget/src/omptarget.cpp =================================================================== --- libomptarget/src/omptarget.cpp +++ libomptarget/src/omptarget.cpp @@ -18,6 +18,7 @@ #include "rtl.h" #include +#include #include #ifdef OMPTARGET_DEBUG @@ -729,8 +730,14 @@ "Size mismatch in arguments and offsets"); // Pop loop trip count - uint64_t ltc = Device.loopTripCnt; - Device.loopTripCnt = 0; + uint64_t ltc = 0; + TblMapMtx.lock(); + auto I = Device.loopTripCnt.find(std::this_thread::get_id()); + if (I != Device.loopTripCnt.end()) { + ltc = I->second; + I->second = 0; + } + TblMapMtx.unlock(); // Launch device execution. DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",