Index: openmp/libomptarget/include/omptarget.h =================================================================== --- openmp/libomptarget/include/omptarget.h +++ openmp/libomptarget/include/omptarget.h @@ -170,6 +170,11 @@ int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +__tgt_async_info *__tgt_target_data_begin_mapper_issue( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers); +void __tgt_target_data_begin_mapper_wait(int32_t device_id, + __tgt_async_info *handle); // passes data from the target, release target memory and destroys the // host-target mapping (top entry from the stack of data maps) created by Index: openmp/libomptarget/src/interface.cpp =================================================================== --- openmp/libomptarget/src/interface.cpp +++ openmp/libomptarget/src/interface.cpp @@ -111,10 +111,34 @@ EXTERN void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers) { - if (IsOffloadDisabled()) return; + + __tgt_async_info *Handle = + __tgt_target_data_begin_mapper_issue(device_id,arg_num, args_base, args, + arg_sizes, arg_types, arg_mappers); + __tgt_target_data_begin_mapper_wait(device_id, Handle); +} + +EXTERN void __tgt_target_data_begin_nowait_mapper(int64_t device_id, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); + + __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, arg_mappers); +} + +EXTERN __tgt_async_info *__tgt_target_data_begin_mapper_issue( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers) { + + __tgt_async_info *Handle = + (__tgt_async_info *) malloc(sizeof(__tgt_async_info)); + if (IsOffloadDisabled()) return Handle; DP("Entering data begin region for device %" PRId64 " with %d mappings\n", - device_id, arg_num); + device_id, arg_num); // No devices available? if (device_id == OFFLOAD_DEVICE_DEFAULT) { @@ -125,7 +149,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %" PRId64 " ready\n", device_id); HandleTargetOutcome(false); - return; + return Handle; } DeviceTy &Device = Devices[device_id]; @@ -133,25 +157,32 @@ #ifdef OMPTARGET_DEBUG for (int i = 0; i < arg_num; ++i) { DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 - ", Type=0x%" PRIx64 "\n", + ", Type=0x%" PRIx64 "\n", i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i]); } #endif int rc = targetDataBegin(Device, arg_num, args_base, args, arg_sizes, - arg_types, arg_mappers, nullptr); + arg_types, arg_mappers, Handle); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); + + return Handle; } -EXTERN void __tgt_target_data_begin_nowait_mapper(int64_t device_id, - int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList, - int32_t noAliasDepNum, void *noAliasDepList) { - if (depNum + noAliasDepNum > 0) - __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); +EXTERN void __tgt_target_data_begin_mapper_wait(int32_t device_id, + __tgt_async_info *handle) { + if (!handle || !handle->Queue) { + DP("No handle to wait on!"); + HandleTargetOutcome(false); + return; + } - __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, - arg_sizes, arg_types, arg_mappers); + // No devices available? + if (device_id == OFFLOAD_DEVICE_DEFAULT) + device_id = omp_get_default_device(); + + DeviceTy &Device = Devices[device_id]; + Device.synchronize(handle); } /// passes data from the target, releases target memory and destroys