diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "Debug.h" @@ -218,7 +219,8 @@ /// A generic pool of resources where \p T is the resource type. /// \p T should be copyable as the object is stored in \p std::vector . -template class ResourcePoolTy { +/// \p NeedTracking is true if we need to track all elements that are given. +template class ResourcePoolTy { using ElementTy = typename AllocTy::ElementTy; /// Index of the next available resource. size_t Next = 0; @@ -226,6 +228,9 @@ std::mutex Mutex; /// Pool of resources. std::vector Resources; + /// Resources that have been given at least once. We don't remove the element + /// when a resource is released. + std::unordered_set ResourcesGiven; /// A reference to the corresponding allocator. AllocTy Allocator; @@ -277,6 +282,9 @@ R = Resources[Next++]; + if (NeedTracking) + ResourcesGiven.insert(R); + return OFFLOAD_SUCCESS; } @@ -300,9 +308,18 @@ /// Released all stored resources and clear the pool. /// Note: This function is not thread safe. Be sure to guard it if necessary. void clear() noexcept { - for (auto &R : Resources) - (void)Allocator.destroy(R); - Resources.clear(); + if (NeedTracking) { + for (auto &R : Resources) + ResourcesGiven.insert(R); + Resources.clear(); + for (auto &R : ResourcesGiven) + (void)Allocator.destroy(R); + ResourcesGiven.clear(); + } else { + for (auto &R : Resources) + (void)Allocator.destroy(R); + Resources.clear(); + } } }; @@ -332,7 +349,7 @@ using StreamPoolTy = ResourcePoolTy; std::vector> StreamPool; - using EventPoolTy = ResourcePoolTy; + using EventPoolTy = ResourcePoolTy; std::vector> EventPool; std::vector DeviceData;