diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -114,21 +114,6 @@
   return OFFLOAD_SUCCESS;
 }
 
-int createEvent(void **P) {
-  CUevent Event = nullptr;
-
-  CUresult Err = cuEventCreate(&Event, CU_EVENT_DEFAULT);
-  if (Err != CUDA_SUCCESS) {
-    DP("Error when creating event event = " DPxMOD "\n", DPxPTR(Event));
-    CUDA_ERR_STRING(Err);
-    return OFFLOAD_FAIL;
-  }
-
-  *P = Event;
-
-  return OFFLOAD_SUCCESS;
-}
-
 int recordEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
   CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo->Queue);
   CUevent Event = reinterpret_cast<CUevent>(EventPtr);
@@ -157,19 +142,6 @@
   return OFFLOAD_SUCCESS;
 }
 
-int destroyEvent(void *EventPtr) {
-  CUevent Event = reinterpret_cast<CUevent>(EventPtr);
-
-  CUresult Err = cuEventDestroy(Event);
-  if (Err != CUDA_SUCCESS) {
-    DP("Error when destroying event = " DPxMOD "\n", DPxPTR(Event));
-    CUDA_ERR_STRING(Err);
-    return OFFLOAD_FAIL;
-  }
-
-  return OFFLOAD_SUCCESS;
-}
-
 // Structure contains per-device data
 struct DeviceDataTy {
   /// List that contains all the kernels.
@@ -231,6 +203,28 @@
   }
 };
 
+/// Allocator for CUevent.
+template <> class AllocatorTy<CUevent> {
+public:
+  /// See AllocatorTy<T>::create.
+  int create(CUevent &Event) noexcept {
+    if (!checkResult(cuEventCreate(&Event, CU_EVENT_DEFAULT),
+                     "Error returned from cuEventCreate\n"))
+      return OFFLOAD_FAIL;
+
+    return OFFLOAD_SUCCESS;
+  }
+
+  /// See AllocatorTy<T>::destroy.
+  int destroy(CUevent Event) noexcept {
+    if (!checkResult(cuEventDestroy(Event),
+                     "Error returned from cuEventDestroy\n"))
+      return OFFLOAD_FAIL;
+
+    return OFFLOAD_SUCCESS;
+  }
+};
+
 /// A generic pool of resources where \p T is the resource type.
 /// \p T should be copyable as the object is stored in \p std::vector .
 template <typename T> class ResourcePoolTy {
@@ -265,10 +259,7 @@
     (void)resize(Size);
   }
 
-  ~ResourcePoolTy() noexcept {
-    for (auto &R : Resources)
-      (void)Allocator.destroy(R);
-  }
+  ~ResourcePoolTy() noexcept { clear(); }
 
   /// Get a resource from pool. `Next` always points to the next available
   /// resource. That means, `[0, next-1]` have been assigned, and `[id,]` are
@@ -307,6 +298,14 @@
     std::lock_guard<std::mutex> LG(Mutex);
     Resources[--Next] = R;
   }
+
+
+  /// Clear all stored resources.
+  void clear() noexcept {
+    for (auto &R : Resources)
+      (void)Allocator.destroy(R);
+    Resources.clear();
+  }
 };
 
 class DeviceRTLTy {
@@ -331,6 +330,8 @@
   using StreamAllocatorTy = AllocatorTy<CUstream>;
   std::vector<std::unique_ptr<StreamPoolTy>> StreamPool;
 
+  ResourcePoolTy<CUevent> EventPool;
+
   std::vector<DeviceDataTy> DeviceData;
   std::vector<CUmodule> Modules;
 
@@ -483,7 +484,7 @@
   DeviceRTLTy()
       : NumberOfDevices(0), EnvNumTeams(-1), EnvTeamLimit(-1),
         EnvTeamThreadLimit(-1), RequiresFlags(OMP_REQ_UNDEFINED),
-        DynamicMemorySize(0) {
+        DynamicMemorySize(0), EventPool(AllocatorTy<CUevent>(), 8) {
 
     DP("Start initializing CUDA\n");
 
@@ -565,6 +566,8 @@
     for (auto &S : StreamPool)
       S = nullptr;
 
+    EventPool.clear();
+
     for (DeviceDataTy &D : DeviceData) {
       // Destroy context
       if (D.Context) {
@@ -1384,6 +1387,19 @@
     printf("    Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2);
   }
 
+  int createEvent(void **P) {
+    CUevent Event = nullptr;
+    if (EventPool.acquire(Event) != OFFLOAD_SUCCESS)
+      return OFFLOAD_FAIL;
+    *P = Event;
+    return OFFLOAD_SUCCESS;
+  }
+
+  int destroyEvent(void *EventPtr) {
+    EventPool.release(reinterpret_cast<CUevent>(EventPtr));
+    return OFFLOAD_SUCCESS;
+  }
+
   int waitEvent(const int DeviceId, __tgt_async_info *AsyncInfo,
                 void *EventPtr) const {
     CUstream Stream = getStream(DeviceId, AsyncInfo);
@@ -1609,7 +1625,7 @@
 
 int32_t __tgt_rtl_create_event(int32_t device_id, void **event) {
   assert(event && "event is nullptr");
-  return createEvent(event);
+  return DeviceRTL.createEvent(event);
 }
 
 int32_t __tgt_rtl_record_event(int32_t device_id, void *event_ptr,
@@ -1639,7 +1655,7 @@
 int32_t __tgt_rtl_destroy_event(int32_t device_id, void *event_ptr) {
   assert(event_ptr && "event is nullptr");
 
-  return destroyEvent(event_ptr);
+  return DeviceRTL.destroyEvent(event_ptr);
 }
 
 #ifdef __cplusplus