diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -119,12 +119,28 @@ *EntriesEnd; // End of the table with all the entries (non inclusive) }; +struct DeviceTy; + /// This struct contains information exchanged between different asynchronous /// operations for device-dependent optimization and potential synchronization struct __tgt_async_info { - // A pointer to a queue-like structure where offloading operations are issued. - // We assume to use this structure to do synchronization. In CUDA backend, it - // is CUstream. + __tgt_async_info(DeviceTy &Device) :Device(Device) {} + + /// The destructor ensures synchronization. + ~__tgt_async_info() { synchronize(); } + + /// Synchronize the queue, if necessary, and resets it to indicate no pending actions. + /// + /// \returns OFFLOAD_FAIL or OFFLOAD_SUCCESS appropriately. + int synchronize(); + + /// The device this object was created for. + DeviceTy &Device; + + /// A pointer to a queue-like structure where offloading operations are issued. + /// We assume to use this structure to do synchronization. In CUDA backend, it + /// is CUstream. If it is a nullptr there are no pending actions that are not + /// synchronized yet. void *Queue = nullptr; }; diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -19,6 +19,17 @@ #include #include +int __tgt_async_info::synchronize() { + int Result = OFFLOAD_SUCCESS; + if (Queue) { + // If we have a queue we need to synchronize it now. As it is synchronized + // afterwards we can get rid of the queue indicating no pending actions. + Result = Device.synchronize(this); + Queue = nullptr; + } + return Result; +} + /* All begin addresses for partially mapped structs must be 8-aligned in order * to ensure proper alignment of members. E.g. * @@ -452,17 +463,6 @@ : HstPtrBegin(HstPtr), DataSize(Size), ForceDelete(ForceDelete), HasCloseModifier(HasCloseModifier) {} }; - -/// Synchronize device -static int syncDevice(DeviceTy &Device, __tgt_async_info *AsyncInfo) { - assert(AsyncInfo && AsyncInfo->Queue && "Invalid AsyncInfo"); - if (Device.synchronize(AsyncInfo) != OFFLOAD_SUCCESS) { - REPORT("Failed to synchronize device.\n"); - return OFFLOAD_FAIL; - } - - return OFFLOAD_SUCCESS; -} } // namespace /// Internal function to undo the mapping and retrieve the data from the device. @@ -642,8 +642,8 @@ // nullptr, there is no data transfer happened because once there is, // AsyncInfo->Queue will not be nullptr, so again, we don't need to // synchronize. - if (AsyncInfo && AsyncInfo->Queue) { - Ret = syncDevice(Device, AsyncInfo); + if (AsyncInfo) { + Ret = AsyncInfo->synchronize(); if (Ret != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; } @@ -1266,7 +1266,7 @@ // TODO: This will go away as soon as we consequently pass in async info // objects (as references). - __tgt_async_info InternalAsyncInfo; + __tgt_async_info InternalAsyncInfo(Device); if (!AsyncInfo) AsyncInfo = &InternalAsyncInfo; @@ -1322,11 +1322,6 @@ REPORT("Failed to process data after launching the kernel.\n"); return OFFLOAD_FAIL; } - } else if (AsyncInfo->Queue) { - // If ArgNum is zero, but AsyncInfo.Queue is valid, then the kernel doesn't - // hava any argument, and the device supports async operations, so we need a - // sync at this point. - return syncDevice(Device, AsyncInfo); } return OFFLOAD_SUCCESS;