Index: openmp/libomptarget/plugins/cuda/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -367,6 +367,7 @@ /// allocate and free memory. class CUDADeviceAllocatorTy : public DeviceAllocatorTy { std::unordered_map HostPinnedAllocs; + std::mutex HostPinnedAllocsMutex; public: void *allocate(size_t Size, void *, TargetAllocTy Kind) override { @@ -390,7 +391,10 @@ MemAlloc = HostPtr; if (!checkResult(Err, "Error returned from cuMemAllocHost\n")) return nullptr; - HostPinnedAllocs[MemAlloc] = Kind; + { + std::lock_guard Lock(HostPinnedAllocsMutex); + HostPinnedAllocs[MemAlloc] = Kind; + } break; case TARGET_ALLOC_SHARED: CUdeviceptr SharedPtr; @@ -406,11 +410,14 @@ int free(void *TgtPtr) override { CUresult Err; - // Host pinned memory must be freed differently. - TargetAllocTy Kind = - (HostPinnedAllocs.find(TgtPtr) == HostPinnedAllocs.end()) - ? TARGET_ALLOC_DEFAULT - : TARGET_ALLOC_HOST; + TargetAllocTy Kind; + { + std::lock_guard Lock(HostPinnedAllocsMutex); + // Host pinned memory must be freed differently. + Kind = (HostPinnedAllocs.find(TgtPtr) == HostPinnedAllocs.end()) + ? TARGET_ALLOC_DEFAULT : TARGET_ALLOC_HOST; + } + switch (Kind) { case TARGET_ALLOC_DEFAULT: case TARGET_ALLOC_DEVICE: @@ -564,8 +571,13 @@ DP("Parsed LIBOMPTARGET_NUM_INITIAL_STREAMS=%d\n", NumInitialStreams); } - for (int I = 0; I < NumberOfDevices; ++I) - DeviceAllocators.emplace_back(); + // The CUDADeviceAllocatorTy has a std::mutex and these cannot be moved or + // copied. The swap operation guarantees no move, copy or swap on the + // individual elements. + { + std::vector DeviceAllocatorsTemp(NumberOfDevices); + DeviceAllocators.swap(DeviceAllocatorsTemp); + } // Get the size threshold from environment variable std::pair Res = MemoryManagerTy::getSizeThresholdFromEnv();