diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -119,12 +119,23 @@ *EntriesEnd; // End of the table with all the entries (non inclusive) }; +// clang-format on + +struct DeviceTy; + /// This struct contains information exchanged between different asynchronous /// operations for device-dependent optimization and potential synchronization struct __tgt_async_info { - // A pointer to a queue-like structure where offloading operations are issued. - // We assume to use this structure to do synchronization. In CUDA backend, it - // is CUstream. + /// Synchronize the queue with \p Device, if necessary, and resets it to + /// indicate no pending actions. + /// + /// \returns OFFLOAD_FAIL or OFFLOAD_SUCCESS appropriately. + int synchronize(DeviceTy &Device); + + /// A pointer to a queue-like structure where offloading operations are + /// issued. We assume to use this structure to do synchronization. In CUDA + /// backend, it is CUstream. If it is a nullptr there are no pending actions + /// that are not synchronized yet. void *Queue = nullptr; }; @@ -135,8 +146,6 @@ uint64_t Stride; }; -// clang-format on - #ifdef __cplusplus extern "C" { #endif diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -19,6 +19,17 @@ #include #include +int __tgt_async_info::synchronize(DeviceTy &Device) { + int Result = OFFLOAD_SUCCESS; + if (Queue) { + // If we have a queue we need to synchronize it now. As it is synchronized + // afterwards we can get rid of the queue indicating no pending actions. + Result = Device.synchronize(this); + Queue = nullptr; + } + return Result; +} + /* All begin addresses for partially mapped structs must be 8-aligned in order * to ensure proper alignment of members. E.g. * @@ -452,17 +463,6 @@ : HstPtrBegin(HstPtr), DataSize(Size), ForceDelete(ForceDelete), HasCloseModifier(HasCloseModifier) {} }; - -/// Synchronize device -static int syncDevice(DeviceTy &Device, __tgt_async_info *AsyncInfo) { - assert(AsyncInfo && AsyncInfo->Queue && "Invalid AsyncInfo"); - if (Device.synchronize(AsyncInfo) != OFFLOAD_SUCCESS) { - REPORT("Failed to synchronize device.\n"); - return OFFLOAD_FAIL; - } - - return OFFLOAD_SUCCESS; -} } // namespace /// Internal function to undo the mapping and retrieve the data from the device. @@ -642,8 +642,8 @@ // nullptr, there is no data transfer happened because once there is, // AsyncInfo->Queue will not be nullptr, so again, we don't need to // synchronize. - if (AsyncInfo && AsyncInfo->Queue) { - Ret = syncDevice(Device, AsyncInfo); + if (AsyncInfo) { + Ret = AsyncInfo->synchronize(Device); if (Ret != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; } @@ -1322,11 +1322,13 @@ REPORT("Failed to process data after launching the kernel.\n"); return OFFLOAD_FAIL; } - } else if (AsyncInfo->Queue) { + } else { + // TODO: We should not synchronize here but on the outer level once we pass + // in a reference AsyncInfo object. // If ArgNum is zero, but AsyncInfo.Queue is valid, then the kernel doesn't // hava any argument, and the device supports async operations, so we need a // sync at this point. - return syncDevice(Device, AsyncInfo); + return AsyncInfo->synchronize(Device); } return OFFLOAD_SUCCESS;