diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -21,6 +21,7 @@ #include #define OFFLOAD_SUCCESS (0) +#define OFFLOAD_EVENT_NOT_READY 600 #define OFFLOAD_FAIL (~0) #define OFFLOAD_DEVICE_DEFAULT -1 diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h --- a/openmp/libomptarget/include/omptargetplugin.h +++ b/openmp/libomptarget/include/omptargetplugin.h @@ -151,18 +151,22 @@ // 2) Record the event based on the status of \p AsyncInfo->Queue at the moment // of function call to __tgt_rtl_record_event. An event becomes "meaningful" // once it is recorded, such that others can depend on it. -// 3) Call __tgt_rtl_wait_event to set dependence on the event. Whether the +// 3) Call __tgt_rtl_query_event to query the status of all work currently +// captured by an event +// 4) Call __tgt_rtl_wait_event to set dependence on the event. Whether the // operation is blocking or non-blocking depends on the target. It is expected // to be non-blocking, just set dependence and return. -// 4) Call __tgt_rtl_sync_event to sync the event. It is expected to block the +// 5) Call __tgt_rtl_sync_event to sync the event. It is expected to block the // thread calling the function. -// 5) Destroy the event (__tgt_rtl_destroy_event). +// 6) Destroy the event (__tgt_rtl_destroy_event). // { int32_t __tgt_rtl_create_event(int32_t ID, void **Event); int32_t __tgt_rtl_record_event(int32_t ID, void *Event, __tgt_async_info *AsyncInfo); +int32_t __tgt_rtl_query_event(int32_t ID, void *Event); + int32_t __tgt_rtl_wait_event(int32_t ID, void *Event, __tgt_async_info *AsyncInfo); diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h --- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h +++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h @@ -28,6 +28,7 @@ CUDA_SUCCESS = 0, CUDA_ERROR_INVALID_VALUE = 1, CUDA_ERROR_INVALID_HANDLE = 400, + CUDA_ERROR_NOT_READY = 600, } CUresult; typedef enum CUstream_flags_enum { @@ -258,6 +259,7 @@ CUresult cuEventCreate(CUevent *, unsigned int); CUresult cuEventRecord(CUevent, CUstream); +CUresult cuEventQuery(CUevent); CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int); CUresult cuEventSynchronize(CUevent); CUresult cuEventDestroy(CUevent); diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp --- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp +++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp @@ -71,6 +71,7 @@ DLWRAP(cuEventCreate, 2); DLWRAP(cuEventRecord, 2); +DLWRAP(cuEventQuery, 1); DLWRAP(cuStreamWaitEvent, 3); DLWRAP(cuEventSynchronize, 1); DLWRAP(cuEventDestroy, 1); diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -159,6 +159,24 @@ return OFFLOAD_SUCCESS; } +int queryEvent(void *EventPtr) { + CUevent Event = reinterpret_cast(EventPtr); + CUresult Err = cuEventQuery(Event); + + if (Err == CUDA_ERROR_NOT_READY) { + DP("Captured work is incomplete. Event = " DPxMOD "\n", DPxPTR(Event)); + return OFFLOAD_EVENT_NOT_READY; + } + + if (Err != CUDA_SUCCESS) { + DP("Error when querying an event. Event = " DPxMOD "\n", DPxPTR(Event)); + CUDA_ERR_STRING(Err); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} + int syncEvent(void *EventPtr) { CUevent Event = reinterpret_cast(EventPtr); @@ -1621,11 +1639,16 @@ __tgt_async_info *async_info_ptr) { assert(async_info_ptr && "async_info_ptr is nullptr"); assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr"); - assert(event_ptr && "event_ptr is nullptr"); + assert(event_ptr && "event is nullptr"); return recordEvent(event_ptr, async_info_ptr); } +int32_t __tgt_rtl_query_event(int32_t device_id, void *event_ptr) { + assert(event_ptr && "event is nullptr"); + return queryEvent(event_ptr); +} + int32_t __tgt_rtl_wait_event(int32_t device_id, void *event_ptr, __tgt_async_info *async_info_ptr) { assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); diff --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports --- a/openmp/libomptarget/plugins/exports +++ b/openmp/libomptarget/plugins/exports @@ -26,6 +26,7 @@ __tgt_rtl_print_device_info; __tgt_rtl_create_event; __tgt_rtl_record_event; + __tgt_rtl_query_event; __tgt_rtl_wait_event; __tgt_rtl_sync_event; __tgt_rtl_destroy_event; diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h --- a/openmp/libomptarget/src/device.h +++ b/openmp/libomptarget/src/device.h @@ -250,6 +250,7 @@ bool IsInit; std::once_flag InitFlag; bool HasPendingGlobals; + bool HasEventSupport; HostDataToTargetListTy HostDataToTargetMap; PendingCtorsDtorsPerLibrary PendingCtorsDtors; @@ -356,6 +357,9 @@ /// Create an event. int32_t createEvent(void **Event); + /// query the status of an event. + int32_t queryEvent(void *Event); + /// Record the event based on status in AsyncInfo->Queue at the moment the /// function is called. int32_t recordEvent(void *Event, AsyncInfoTy &AsyncInfo); diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -21,8 +21,10 @@ DeviceTy::DeviceTy(RTLInfoTy *RTL) : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), - HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(), - ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), ShadowMtx() {} + HasPendingGlobals(false), + HasEventSupport(RTL->create_event ? true : false), HostDataToTargetMap(), + PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), + ShadowMtx() {} DeviceTy::~DeviceTy() { if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)) @@ -561,6 +563,13 @@ return OFFLOAD_SUCCESS; } +int32_t DeviceTy::queryEvent(void *Event) { + if (RTL->query_event) + return RTL->query_event(RTLDeviceID, Event); + + return OFFLOAD_SUCCESS; +} + int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) { if (RTL->wait_event) return RTL->wait_event(RTLDeviceID, Event, AsyncInfo); diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h --- a/openmp/libomptarget/src/rtl.h +++ b/openmp/libomptarget/src/rtl.h @@ -59,6 +59,7 @@ typedef void(set_info_flag_ty)(uint32_t); typedef int32_t(create_event_ty)(int32_t, void **); typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *); + typedef int32_t(query_event_ty)(int32_t, void *); typedef int32_t(wait_event_ty)(int32_t, void *, __tgt_async_info *); typedef int32_t(sync_event_ty)(int32_t, void *); typedef int32_t(destroy_event_ty)(int32_t, void *); @@ -102,6 +103,7 @@ print_device_info_ty *print_device_info = nullptr; create_event_ty *create_event = nullptr; record_event_ty *record_event = nullptr; + query_event_ty *query_event = nullptr; wait_event_ty *wait_event = nullptr; sync_event_ty *sync_event = nullptr; destroy_event_ty *destroy_event = nullptr; diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -187,6 +187,7 @@ dlsym(dynlib_handle, "__tgt_rtl_create_event"); *((void **)&R.record_event) = dlsym(dynlib_handle, "__tgt_rtl_record_event"); + *((void **)&R.query_event) = dlsym(dynlib_handle, "__tgt_rtl_query_event"); *((void **)&R.wait_event) = dlsym(dynlib_handle, "__tgt_rtl_wait_event"); *((void **)&R.sync_event) = dlsym(dynlib_handle, "__tgt_rtl_sync_event"); *((void **)&R.destroy_event) =