Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -427,7 +427,7 @@ VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) __OMP_RTL(__tgt_target_data_begin_mapper_issue, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, AsyncInfoPtr) -__OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, Int64, AsyncInfoPtr) +__OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, IdentPtr, Int64, AsyncInfoPtr) __OMP_RTL(__tgt_target_data_end_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) __OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, IdentPtr, Int64, Int32, Index: openmp/libomptarget/include/omptarget.h =================================================================== --- openmp/libomptarget/include/omptarget.h +++ openmp/libomptarget/include/omptarget.h @@ -187,10 +187,17 @@ __tgt_async_info AsyncInfo; DeviceTy &Device; + // If the flag is set, synchronize is called in the delete function. + bool ShouldSync; public: - AsyncInfoTy(DeviceTy &Device) : Device(Device) {} - ~AsyncInfoTy() { synchronize(); } + AsyncInfoTy(DeviceTy &Device, bool ShouldSync = true) + : Device(Device), ShouldSync(ShouldSync) {} + + ~AsyncInfoTy() { + if (ShouldSync) + synchronize(); + } /// Implicit conversion to the __tgt_async_info which is used in the /// plugin interface. @@ -278,6 +285,17 @@ void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers); +// Issues the memory transfer asynchronously and sets a handle. +void __tgt_target_data_begin_mapper_issue( + ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, + void **ArgMappers, __tgt_async_info *Handle); + +// Waits in the set handle from "__tgt_target_data_begin_mapper_issue" +// for the memory transfer to finish. +void __tgt_target_data_begin_mapper_wait(ident_t *Loc, int64_t DeviceId, + __tgt_async_info *Handle); + void __tgt_target_data_begin_nowait_mapper( ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, Index: openmp/libomptarget/src/exports =================================================================== --- openmp/libomptarget/src/exports +++ openmp/libomptarget/src/exports @@ -15,6 +15,8 @@ __tgt_target_nowait; __tgt_target_teams_nowait; __tgt_target_data_begin_mapper; + __tgt_target_data_begin_mapper_issue; + __tgt_target_data_begin_mapper_wait; __tgt_target_data_end_mapper; __tgt_target_data_update_mapper; __tgt_target_mapper; Index: openmp/libomptarget/src/interface.cpp =================================================================== --- openmp/libomptarget/src/interface.cpp +++ openmp/libomptarget/src/interface.cpp @@ -70,6 +70,15 @@ int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers) { + __tgt_target_data_begin_mapper_issue(Loc, DeviceId, ArgNum, ArgsBase, Args, + ArgSizes, ArgTypes, ArgNames, ArgMappers, + nullptr); +} + +EXTERN void __tgt_target_data_begin_mapper_issue( + ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, + void **ArgMappers, __tgt_async_info *Handle) { TIMESCOPE_WITH_IDENT(Loc); DP("Entering data begin region for device %" PRId64 " with %d mappings\n", DeviceId, ArgNum); @@ -92,14 +101,39 @@ } #endif - AsyncInfoTy AsyncInfo(Device); + AsyncInfoTy AsyncInfo(Device, !(Handle)); int Rc = targetDataBegin(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, AsyncInfo); - if (Rc == OFFLOAD_SUCCESS) - Rc = AsyncInfo.synchronize(); + + if (Rc == OFFLOAD_SUCCESS) { + if (Handle) + Handle->Queue = ((__tgt_async_info *)AsyncInfo)->Queue; + else + Rc = AsyncInfo.synchronize(); + } + handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); } +EXTERN void __tgt_target_data_begin_mapper_wait(ident_t *Loc, int64_t DeviceId, + __tgt_async_info *Handle) { + + TIMESCOPE_WITH_IDENT(Loc); + assert((Handle && Handle->Queue) && "Incomplete data mapping"); + + DP("Entering wait function in data begin region for device %" PRId64 "\n", + DeviceId); + + if (checkDevice(DeviceId, Loc)) { + DP("Not offloading to device %" PRId64 "\n", DeviceId); + return; + } + + DeviceTy &Device = *PM->Devices[DeviceId]; + if (Device.RTL->synchronize) + Device.RTL->synchronize(DeviceId, Handle); +} + EXTERN void __tgt_target_data_begin_nowait_mapper( ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, Index: openmp/libomptarget/src/omptarget.cpp =================================================================== --- openmp/libomptarget/src/omptarget.cpp +++ openmp/libomptarget/src/omptarget.cpp @@ -273,19 +273,8 @@ return PM->TargetOffloadPolicy == tgt_disabled; } -// If offload is enabled, ensure that device DeviceID has been initialized, -// global ctors have been executed, and global data has been mapped. -// -// The return bool indicates if the offload is to the host device -// There are three possible results: -// - Return false if the taregt device is ready for offload -// - Return true without reporting a runtime error if offload is -// disabled, perhaps because the initial device was specified. -// - Report a runtime error and return true. -// -// If DeviceID == OFFLOAD_DEVICE_DEFAULT, set DeviceID to the default device. -// This step might be skipped if offload is disabled. -bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc) { +// If offload is enabled, ensure that device DeviceID has been initialized. +bool checkDevice(int64_t &DeviceID, ident_t *Loc) { if (isOffloadDisabled()) { DP("Offload is disabled\n"); return true; @@ -316,6 +305,26 @@ return true; } + return false; +} + +// If offload is enabled, ensure that device DeviceID has been initialized, +// global ctors have been executed, and global data has been mapped. +// +// The return bool indicates if the offload is to the host device +// There are three possible results: +// - Return false if the taregt device is ready for offload +// - Return true without reporting a runtime error if offload is +// disabled, perhaps because the initial device was specified. +// - Report a runtime error and return true. +// +// If DeviceID == OFFLOAD_DEVICE_DEFAULT, set DeviceID to the default device. +// This step might be skipped if offload is disabled. +bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc) { + + if(checkDevice(DeviceID, Loc)) + return true; + // Get device info. DeviceTy &Device = *PM->Devices[DeviceID]; Index: openmp/libomptarget/src/private.h =================================================================== --- openmp/libomptarget/src/private.h +++ openmp/libomptarget/src/private.h @@ -46,6 +46,7 @@ AsyncInfoTy &AsyncInfo); extern void handleTargetOutcome(bool Success, ident_t *Loc); +extern bool checkDevice(int64_t &DeviceID, ident_t *Loc); extern bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc); extern void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, const char *Name);