diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -171,6 +171,18 @@ int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +// Asynchronous version of __tgt_target_data_begin_mapper that puts the +// information of the asynchronous transfer in \p handle. This handle can be +// used later by __tgt_target_data_begin_mapper_wait to wait/syncrhonize until +// the transfer is complete. If \p handle is nullptr then the transfer is +// asyncrhonous. +void __tgt_target_data_begin_mapper_issue( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, + __tgt_async_info *handle); +void __tgt_target_data_begin_mapper_wait(int64_t device_id, + __tgt_async_info *handle); + // passes data from the target, release target memory and destroys the // host-target mapping (top entry from the stack of data maps) created by // the last __tgt_target_data_begin diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -114,10 +114,31 @@ EXTERN void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers) { + __tgt_target_data_begin_mapper_issue(device_id, arg_num, args_base, args, + arg_sizes, arg_types, arg_mappers, + nullptr); +} + +EXTERN void __tgt_target_data_begin_nowait_mapper(int64_t device_id, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); + + __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, arg_mappers); +} + +EXTERN void __tgt_target_data_begin_mapper_issue( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, + __tgt_async_info *handle) { + if (IsOffloadDisabled()) return; DP("Entering data begin region for device %" PRId64 " with %d mappings\n", - device_id, arg_num); + device_id, arg_num); // No devices available? if (device_id == OFFLOAD_DEVICE_DEFAULT) { @@ -142,19 +163,30 @@ #endif int rc = targetDataBegin(Device, arg_num, args_base, args, arg_sizes, - arg_types, arg_mappers, nullptr); + arg_types, arg_mappers, handle); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } -EXTERN void __tgt_target_data_begin_nowait_mapper(int64_t device_id, - int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList, - int32_t noAliasDepNum, void *noAliasDepList) { - if (depNum + noAliasDepNum > 0) - __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); +EXTERN void __tgt_target_data_begin_mapper_wait(int64_t device_id, + __tgt_async_info *handle) { + if (!handle || !handle->Queue) { + DP("No handle to wait on!"); + HandleTargetOutcome(false); + return; + } - __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, - arg_sizes, arg_types, arg_mappers); + // No devices available? + if (device_id == OFFLOAD_DEVICE_DEFAULT) + device_id = omp_get_default_device(); + + if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { + DP("Failed to get device %" PRId64 " ready\n", device_id); + HandleTargetOutcome(false); + return; + } + + DeviceTy &Device = Devices[device_id]; + Device.synchronize(handle); } /// passes data from the target, releases target memory and destroys