diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h --- a/openmp/libomptarget/include/omptargetplugin.h +++ b/openmp/libomptarget/include/omptargetplugin.h @@ -69,8 +69,7 @@ int64_t Size); int32_t __tgt_rtl_data_submit_async(int32_t ID, void *TargetPtr, void *HostPtr, - int64_t Size, - __tgt_async_info *AsyncInfoPtr); + int64_t Size, __tgt_async_info *AsyncInfo); // Retrieve the data content from the target device using its address. In case // of success, return zero. Otherwise, return an error code. @@ -80,7 +79,7 @@ // Asynchronous version of __tgt_rtl_data_retrieve int32_t __tgt_rtl_data_retrieve_async(int32_t ID, void *HostPtr, void *TargetPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr); + __tgt_async_info *AsyncInfo); // Copy the data content from one target device to another target device using // its address. This operation does not need to copy data back to host and then @@ -92,7 +91,7 @@ // Asynchronous version of __tgt_rtl_data_exchange int32_t __tgt_rtl_data_exchange_async(int32_t SrcID, void *SrcPtr, int32_t DesID, void *DstPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr); + __tgt_async_info *AsyncInfo); // De-allocate the data referenced by target ptr on the device. In case of // success, return zero. Otherwise, return an error code. @@ -101,8 +100,8 @@ // Transfer control to the offloaded entry Entry on the target device. // Args and Offsets are arrays of NumArgs size of target addresses and // offsets. An offset should be added to the target address before passing it -// to the outlined function on device side. If AsyncInfoPtr is nullptr, it is -// synchronous; otherwise it is asynchronous. However, AsyncInfoPtr may be +// to the outlined function on device side. If AsyncInfo is nullptr, it is +// synchronous; otherwise it is asynchronous. However, AsyncInfo may be // ignored on some platforms, like x86_64. In that case, it is synchronous. In // case of success, return zero. Otherwise, return an error code. int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args, @@ -111,12 +110,12 @@ // Asynchronous version of __tgt_rtl_run_target_region int32_t __tgt_rtl_run_target_region_async(int32_t ID, void *Entry, void **Args, ptrdiff_t *Offsets, int32_t NumArgs, - __tgt_async_info *AsyncInfoPtr); + __tgt_async_info *AsyncInfo); // Similar to __tgt_rtl_run_target_region, but additionally specify the // number of teams to be created and a number of threads in each team. If -// AsyncInfoPtr is nullptr, it is synchronous; otherwise it is asynchronous. -// However, AsyncInfoPtr may be ignored on some platforms, like x86_64. In that +// AsyncInfo is nullptr, it is synchronous; otherwise it is asynchronous. +// However, AsyncInfo may be ignored on some platforms, like x86_64. In that // case, it is synchronous. int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args, ptrdiff_t *Offsets, int32_t NumArgs, @@ -127,11 +126,11 @@ int32_t __tgt_rtl_run_target_team_region_async( int32_t ID, void *Entry, void **Args, ptrdiff_t *Offsets, int32_t NumArgs, int32_t NumTeams, int32_t ThreadLimit, uint64_t loop_tripcount, - __tgt_async_info *AsyncInfoPtr); + __tgt_async_info *AsyncInfo); // Device synchronization. In case of success, return zero. Otherwise, return an // error code. -int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfoPtr); +int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfo); #ifdef __cplusplus } diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -560,8 +560,8 @@ namespace { int32_t dataRetrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { - assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr"); + __tgt_async_info *AsyncInfo) { + assert(AsyncInfo && "AsyncInfo is nullptr"); assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large"); // Return success if we are not copying back to host from target. if (!HstPtr) @@ -587,8 +587,8 @@ } int32_t dataSubmit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { - assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr"); + __tgt_async_info *AsyncInfo) { + assert(AsyncInfo && "AsyncInfo is nullptr"); atmi_status_t err; assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large"); // Return success if we are not doing host to target. @@ -621,20 +621,20 @@ // there are no outstanding kernels that need to be synchronized. Any async call // may be passed a Queue==0, at which point the cuda implementation will set it // to non-null (see getStream). The cuda streams are per-device. Upstream may -// change this interface to explicitly initialize the async_info_pointer, but +// change this interface to explicitly initialize the AsyncInfo_pointer, but // until then hsa lazily initializes it as well. -void initAsyncInfoPtr(__tgt_async_info *async_info_ptr) { +void initAsyncInfo(__tgt_async_info *AsyncInfo) { // set non-null while using async calls, return to null to indicate completion - assert(async_info_ptr); - if (!async_info_ptr->Queue) { - async_info_ptr->Queue = reinterpret_cast(UINT64_MAX); + assert(AsyncInfo); + if (!AsyncInfo->Queue) { + AsyncInfo->Queue = reinterpret_cast(UINT64_MAX); } } -void finiAsyncInfoPtr(__tgt_async_info *async_info_ptr) { - assert(async_info_ptr); - assert(async_info_ptr->Queue); - async_info_ptr->Queue = 0; +void finiAsyncInfo(__tgt_async_info *AsyncInfo) { + assert(AsyncInfo); + assert(AsyncInfo->Queue); + AsyncInfo->Queue = 0; } bool elf_machine_id_is_amdgcn(__tgt_device_image *image) { @@ -1500,21 +1500,20 @@ int32_t __tgt_rtl_data_submit(int device_id, void *tgt_ptr, void *hst_ptr, int64_t size) { assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large"); - __tgt_async_info async_info; - int32_t rc = dataSubmit(device_id, tgt_ptr, hst_ptr, size, &async_info); + __tgt_async_info AsyncInfo; + int32_t rc = dataSubmit(device_id, tgt_ptr, hst_ptr, size, &AsyncInfo); if (rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(device_id, &async_info); + return __tgt_rtl_synchronize(device_id, &AsyncInfo); } int32_t __tgt_rtl_data_submit_async(int device_id, void *tgt_ptr, void *hst_ptr, - int64_t size, - __tgt_async_info *async_info_ptr) { + int64_t size, __tgt_async_info *AsyncInfo) { assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large"); - if (async_info_ptr) { - initAsyncInfoPtr(async_info_ptr); - return dataSubmit(device_id, tgt_ptr, hst_ptr, size, async_info_ptr); + if (AsyncInfo) { + initAsyncInfo(AsyncInfo); + return dataSubmit(device_id, tgt_ptr, hst_ptr, size, AsyncInfo); } else { return __tgt_rtl_data_submit(device_id, tgt_ptr, hst_ptr, size); } @@ -1523,21 +1522,21 @@ int32_t __tgt_rtl_data_retrieve(int device_id, void *hst_ptr, void *tgt_ptr, int64_t size) { assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large"); - __tgt_async_info async_info; - int32_t rc = dataRetrieve(device_id, hst_ptr, tgt_ptr, size, &async_info); + __tgt_async_info AsyncInfo; + int32_t rc = dataRetrieve(device_id, hst_ptr, tgt_ptr, size, &AsyncInfo); if (rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(device_id, &async_info); + return __tgt_rtl_synchronize(device_id, &AsyncInfo); } int32_t __tgt_rtl_data_retrieve_async(int device_id, void *hst_ptr, void *tgt_ptr, int64_t size, - __tgt_async_info *async_info_ptr) { - assert(async_info_ptr && "async_info is nullptr"); + __tgt_async_info *AsyncInfo) { + assert(AsyncInfo && "AsyncInfo is nullptr"); assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large"); - initAsyncInfoPtr(async_info_ptr); - return dataRetrieve(device_id, hst_ptr, tgt_ptr, size, async_info_ptr); + initAsyncInfo(AsyncInfo); + return dataRetrieve(device_id, hst_ptr, tgt_ptr, size, AsyncInfo); } int32_t __tgt_rtl_data_delete(int device_id, void *tgt_ptr) { @@ -1918,9 +1917,9 @@ void *tgt_entry_ptr, void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num, - __tgt_async_info *async_info_ptr) { - assert(async_info_ptr && "async_info is nullptr"); - initAsyncInfoPtr(async_info_ptr); + __tgt_async_info *AsyncInfo) { + assert(AsyncInfo && "AsyncInfo is nullptr"); + initAsyncInfo(AsyncInfo); // use one team and one thread // fix thread num @@ -1931,15 +1930,14 @@ thread_limit, 0); } -int32_t __tgt_rtl_synchronize(int32_t device_id, - __tgt_async_info *async_info_ptr) { - assert(async_info_ptr && "async_info is nullptr"); +int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) { + assert(AsyncInfo && "AsyncInfo is nullptr"); - // Cuda asserts that async_info_ptr->Queue is non-null, but this invariant + // Cuda asserts that AsyncInfo->Queue is non-null, but this invariant // is not ensured by devices.cpp for amdgcn - // assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr"); - if (async_info_ptr->Queue) { - finiAsyncInfoPtr(async_info_ptr); + // assert(AsyncInfo->Queue && "AsyncInfo->Queue is nullptr"); + if (AsyncInfo->Queue) { + finiAsyncInfo(AsyncInfo); } return OFFLOAD_SUCCESS; } diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -380,13 +380,13 @@ E.Table.EntriesBegin = E.Table.EntriesEnd = nullptr; } - CUstream getStream(const int DeviceId, __tgt_async_info *AsyncInfoPtr) const { - assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr"); + CUstream getStream(const int DeviceId, __tgt_async_info *AsyncInfo) const { + assert(AsyncInfo && "AsyncInfo is nullptr"); - if (!AsyncInfoPtr->Queue) - AsyncInfoPtr->Queue = StreamManager->getStream(DeviceId); + if (!AsyncInfo->Queue) + AsyncInfo->Queue = StreamManager->getStream(DeviceId); - return reinterpret_cast(AsyncInfoPtr->Queue); + return reinterpret_cast(AsyncInfo->Queue); } public: @@ -812,14 +812,14 @@ } int dataSubmit(const int DeviceId, const void *TgtPtr, const void *HstPtr, - const int64_t Size, __tgt_async_info *AsyncInfoPtr) const { - assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr"); + const int64_t Size, __tgt_async_info *AsyncInfo) const { + assert(AsyncInfo && "AsyncInfo is nullptr"); CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context); if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n")) return OFFLOAD_FAIL; - CUstream Stream = getStream(DeviceId, AsyncInfoPtr); + CUstream Stream = getStream(DeviceId, AsyncInfo); Err = cuMemcpyHtoDAsync((CUdeviceptr)TgtPtr, HstPtr, Size, Stream); if (Err != CUDA_SUCCESS) { @@ -834,14 +834,14 @@ } int dataRetrieve(const int DeviceId, void *HstPtr, const void *TgtPtr, - const int64_t Size, __tgt_async_info *AsyncInfoPtr) const { - assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr"); + const int64_t Size, __tgt_async_info *AsyncInfo) const { + assert(AsyncInfo && "AsyncInfo is nullptr"); CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context); if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n")) return OFFLOAD_FAIL; - CUstream Stream = getStream(DeviceId, AsyncInfoPtr); + CUstream Stream = getStream(DeviceId, AsyncInfo); Err = cuMemcpyDtoHAsync(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream); if (Err != CUDA_SUCCESS) { @@ -856,14 +856,14 @@ } int dataExchange(int SrcDevId, const void *SrcPtr, int DstDevId, void *DstPtr, - int64_t Size, __tgt_async_info *AsyncInfoPtr) const { - assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr"); + int64_t Size, __tgt_async_info *AsyncInfo) const { + assert(AsyncInfo && "AsyncInfo is nullptr"); CUresult Err = cuCtxSetCurrent(DeviceData[SrcDevId].Context); if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n")) return OFFLOAD_FAIL; - CUstream Stream = getStream(SrcDevId, AsyncInfoPtr); + CUstream Stream = getStream(SrcDevId, AsyncInfo); // If they are two devices, we try peer to peer copy first if (SrcDevId != DstDevId) { @@ -1032,23 +1032,23 @@ return OFFLOAD_SUCCESS; } - int synchronize(const int DeviceId, __tgt_async_info *AsyncInfoPtr) const { - CUstream Stream = reinterpret_cast(AsyncInfoPtr->Queue); + int synchronize(const int DeviceId, __tgt_async_info *AsyncInfo) const { + CUstream Stream = reinterpret_cast(AsyncInfo->Queue); CUresult Err = cuStreamSynchronize(Stream); if (Err != CUDA_SUCCESS) { REPORT("Error when synchronizing stream. stream = " DPxMOD ", async info ptr = " DPxMOD "\n", - DPxPTR(Stream), DPxPTR(AsyncInfoPtr)); + DPxPTR(Stream), DPxPTR(AsyncInfo)); CUDA_ERR_STRING(Err); return OFFLOAD_FAIL; } // Once the stream is synchronized, return it to stream pool and reset - // async_info. This is to make sure the synchronization only works for its + // AsyncInfo. This is to make sure the synchronization only works for its // own tasks. - StreamManager->returnStream( - DeviceId, reinterpret_cast(AsyncInfoPtr->Queue)); - AsyncInfoPtr->Queue = nullptr; + StreamManager->returnStream(DeviceId, + reinterpret_cast(AsyncInfo->Queue)); + AsyncInfo->Queue = nullptr; return OFFLOAD_SUCCESS; } @@ -1105,58 +1105,56 @@ int64_t size) { assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - __tgt_async_info async_info; + __tgt_async_info AsyncInfo; const int32_t rc = __tgt_rtl_data_submit_async(device_id, tgt_ptr, hst_ptr, - size, &async_info); + size, &AsyncInfo); if (rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(device_id, &async_info); + return __tgt_rtl_synchronize(device_id, &AsyncInfo); } int32_t __tgt_rtl_data_submit_async(int32_t device_id, void *tgt_ptr, void *hst_ptr, int64_t size, - __tgt_async_info *async_info_ptr) { + __tgt_async_info *AsyncInfo) { assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - assert(async_info_ptr && "async_info_ptr is nullptr"); + assert(AsyncInfo && "AsyncInfo is nullptr"); - return DeviceRTL.dataSubmit(device_id, tgt_ptr, hst_ptr, size, - async_info_ptr); + return DeviceRTL.dataSubmit(device_id, tgt_ptr, hst_ptr, size, AsyncInfo); } int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr, int64_t size) { assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - __tgt_async_info async_info; + __tgt_async_info AsyncInfo; const int32_t rc = __tgt_rtl_data_retrieve_async(device_id, hst_ptr, tgt_ptr, - size, &async_info); + size, &AsyncInfo); if (rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(device_id, &async_info); + return __tgt_rtl_synchronize(device_id, &AsyncInfo); } int32_t __tgt_rtl_data_retrieve_async(int32_t device_id, void *hst_ptr, void *tgt_ptr, int64_t size, - __tgt_async_info *async_info_ptr) { + __tgt_async_info *AsyncInfo) { assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - assert(async_info_ptr && "async_info_ptr is nullptr"); + assert(AsyncInfo && "AsyncInfo is nullptr"); - return DeviceRTL.dataRetrieve(device_id, hst_ptr, tgt_ptr, size, - async_info_ptr); + return DeviceRTL.dataRetrieve(device_id, hst_ptr, tgt_ptr, size, AsyncInfo); } int32_t __tgt_rtl_data_exchange_async(int32_t src_dev_id, void *src_ptr, int dst_dev_id, void *dst_ptr, int64_t size, - __tgt_async_info *async_info_ptr) { + __tgt_async_info *AsyncInfo) { assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid"); assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid"); - assert(async_info_ptr && "async_info_ptr is nullptr"); + assert(AsyncInfo && "AsyncInfo is nullptr"); return DeviceRTL.dataExchange(src_dev_id, src_ptr, dst_dev_id, dst_ptr, size, - async_info_ptr); + AsyncInfo); } int32_t __tgt_rtl_data_exchange(int32_t src_dev_id, void *src_ptr, @@ -1165,13 +1163,13 @@ assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid"); assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid"); - __tgt_async_info async_info; + __tgt_async_info AsyncInfo; const int32_t rc = __tgt_rtl_data_exchange_async( - src_dev_id, src_ptr, dst_dev_id, dst_ptr, size, &async_info); + src_dev_id, src_ptr, dst_dev_id, dst_ptr, size, &AsyncInfo); if (rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(src_dev_id, &async_info); + return __tgt_rtl_synchronize(src_dev_id, &AsyncInfo); } int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) { @@ -1188,26 +1186,26 @@ uint64_t loop_tripcount) { assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - __tgt_async_info async_info; + __tgt_async_info AsyncInfo; const int32_t rc = __tgt_rtl_run_target_team_region_async( device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num, - thread_limit, loop_tripcount, &async_info); + thread_limit, loop_tripcount, &AsyncInfo); if (rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(device_id, &async_info); + return __tgt_rtl_synchronize(device_id, &AsyncInfo); } int32_t __tgt_rtl_run_target_team_region_async( int32_t device_id, void *tgt_entry_ptr, void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t team_num, int32_t thread_limit, uint64_t loop_tripcount, - __tgt_async_info *async_info_ptr) { + __tgt_async_info *AsyncInfo) { assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - return DeviceRTL.runTargetTeamRegion( - device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num, - thread_limit, loop_tripcount, async_info_ptr); + return DeviceRTL.runTargetTeamRegion(device_id, tgt_entry_ptr, tgt_args, + tgt_offsets, arg_num, team_num, + thread_limit, loop_tripcount, AsyncInfo); } int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr, @@ -1215,35 +1213,33 @@ int32_t arg_num) { assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - __tgt_async_info async_info; + __tgt_async_info AsyncInfo; const int32_t rc = __tgt_rtl_run_target_region_async( - device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, &async_info); + device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, &AsyncInfo); if (rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(device_id, &async_info); + return __tgt_rtl_synchronize(device_id, &AsyncInfo); } int32_t __tgt_rtl_run_target_region_async(int32_t device_id, void *tgt_entry_ptr, void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num, - __tgt_async_info *async_info_ptr) { + __tgt_async_info *AsyncInfo) { assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); return __tgt_rtl_run_target_team_region_async( device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, - /* team num*/ 1, /* thread_limit */ 1, /* loop_tripcount */ 0, - async_info_ptr); + /* team num*/ 1, /* thread_limit */ 1, /* loop_tripcount */ 0, AsyncInfo); } -int32_t __tgt_rtl_synchronize(int32_t device_id, - __tgt_async_info *async_info_ptr) { +int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) { assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - assert(async_info_ptr && "async_info_ptr is nullptr"); - assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr"); + assert(AsyncInfo && "AsyncInfo is nullptr"); + assert(AsyncInfo->Queue && "AsyncInfo->Queue is nullptr"); - return DeviceRTL.synchronize(device_id, async_info_ptr); + return DeviceRTL.synchronize(device_id, AsyncInfo); } #ifdef __cplusplus diff --git a/openmp/libomptarget/plugins/remote/server/Server.cpp b/openmp/libomptarget/plugins/remote/server/Server.cpp --- a/openmp/libomptarget/plugins/remote/server/Server.cpp +++ b/openmp/libomptarget/plugins/remote/server/Server.cpp @@ -160,11 +160,11 @@ SERVER_DBG("Synchronizing device %d (probably won't work)", Info->device_id()); - void *AsyncInfoPtr = (void *)Info->queue_ptr(); + void *AsyncInfo = (void *)Info->queue_ptr(); Reply->set_number(0); if (PM->Devices[Info->device_id()].RTL->synchronize) Reply->set_number(PM->Devices[Info->device_id()].synchronize( - (__tgt_async_info *)AsyncInfoPtr)); + (__tgt_async_info *)AsyncInfo)); SERVER_DBG("Synchronized device %d", Info->device_id()); return Status::OK; diff --git a/openmp/libomptarget/plugins/remote/src/Client.h b/openmp/libomptarget/plugins/remote/src/Client.h --- a/openmp/libomptarget/plugins/remote/src/Client.h +++ b/openmp/libomptarget/plugins/remote/src/Client.h @@ -77,30 +77,30 @@ int32_t initRequires(int64_t RequiresFlags); __tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image); - int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr); + int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo); int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId); void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr); int32_t dataDelete(int32_t DeviceId, void *TgtPtr); int32_t dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr, - int64_t Size, __tgt_async_info *AsyncInfoPtr); + int64_t Size, __tgt_async_info *AsyncInfo); int32_t dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr, - int64_t Size, __tgt_async_info *AsyncInfoPtr); + int64_t Size, __tgt_async_info *AsyncInfo); int32_t dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId, void *DstPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr); + __tgt_async_info *AsyncInfo); int32_t runTargetRegionAsync(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, - int32_t ArgNum, __tgt_async_info *AsyncInfoPtr); + int32_t ArgNum, __tgt_async_info *AsyncInfo); int32_t runTargetTeamRegionAsync(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, uint64_t LoopTripCount, - __tgt_async_info *AsyncInfoPtr); + __tgt_async_info *AsyncInfo); }; class RemoteClientManager { @@ -138,30 +138,30 @@ int32_t initRequires(int64_t RequiresFlags); __tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image); - int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr); + int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo); int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId); void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr); int32_t dataDelete(int32_t DeviceId, void *TgtPtr); int32_t dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr, - int64_t Size, __tgt_async_info *AsyncInfoPtr); + int64_t Size, __tgt_async_info *AsyncInfo); int32_t dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr, - int64_t Size, __tgt_async_info *AsyncInfoPtr); + int64_t Size, __tgt_async_info *AsyncInfo); int32_t dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId, void *DstPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr); + __tgt_async_info *AsyncInfo); int32_t runTargetRegionAsync(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, - int32_t ArgNum, __tgt_async_info *AsyncInfoPtr); + int32_t ArgNum, __tgt_async_info *AsyncInfo); int32_t runTargetTeamRegionAsync(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, uint64_t LoopTripCount, - __tgt_async_info *AsyncInfoPtr); + __tgt_async_info *AsyncInfo); }; #endif diff --git a/openmp/libomptarget/plugins/remote/src/Client.cpp b/openmp/libomptarget/plugins/remote/src/Client.cpp --- a/openmp/libomptarget/plugins/remote/src/Client.cpp +++ b/openmp/libomptarget/plugins/remote/src/Client.cpp @@ -251,7 +251,7 @@ } int64_t RemoteOffloadClient::synchronize(int32_t DeviceId, - __tgt_async_info *AsyncInfoPtr) { + __tgt_async_info *AsyncInfo) { return remoteCall( /* Preprocess */ [&](auto &RPCStatus, auto &Context) { @@ -260,7 +260,7 @@ protobuf::Arena::CreateMessage(Arena.get()); Info->set_device_id(DeviceId); - Info->set_queue_ptr((uint64_t)AsyncInfoPtr); + Info->set_queue_ptr((uint64_t)AsyncInfo); CLIENT_DBG("Synchronizing device %d", DeviceId); RPCStatus = Stub->Synchronize(&Context, *Info, Reply); @@ -339,7 +339,7 @@ int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { + __tgt_async_info *AsyncInfo) { return remoteCall( /* Preprocess */ @@ -360,7 +360,7 @@ Request->set_tgt_ptr((uint64_t)TgtPtr); Request->set_start(Start); Request->set_size(Size); - Request->set_queue_ptr((uint64_t)AsyncInfoPtr); + Request->set_queue_ptr((uint64_t)AsyncInfo); CLIENT_DBG("Submitting %ld-%ld/%ld bytes async on device %d at %p", Start, End, Size, DeviceId, TgtPtr) @@ -418,7 +418,7 @@ int32_t RemoteOffloadClient::dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { + __tgt_async_info *AsyncInfo) { return remoteCall( /* Preprocess */ [&](auto &RPCStatus, auto &Context) { @@ -429,7 +429,7 @@ Request->set_size(Size); Request->set_hst_ptr((int64_t)HstPtr); Request->set_tgt_ptr((int64_t)TgtPtr); - Request->set_queue_ptr((uint64_t)AsyncInfoPtr); + Request->set_queue_ptr((uint64_t)AsyncInfo); auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); std::unique_ptr> Reader( @@ -481,7 +481,7 @@ int32_t RemoteOffloadClient::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId, void *DstPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { + __tgt_async_info *AsyncInfo) { return remoteCall( /* Preprocess */ [&](auto &RPCStatus, auto &Context) { @@ -494,7 +494,7 @@ Request->set_dst_dev_id(DstDevId); Request->set_dst_ptr((uint64_t)DstPtr); Request->set_size(Size); - Request->set_queue_ptr((uint64_t)AsyncInfoPtr); + Request->set_queue_ptr((uint64_t)AsyncInfo); CLIENT_DBG( "Exchanging %ld bytes on device %d at %p for %p on device %d", Size, @@ -547,7 +547,7 @@ int32_t RemoteOffloadClient::runTargetRegionAsync( int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, - int32_t ArgNum, __tgt_async_info *AsyncInfoPtr) { + int32_t ArgNum, __tgt_async_info *AsyncInfo) { return remoteCall( /* Preprocess */ [&](auto &RPCStatus, auto &Context) { @@ -556,7 +556,7 @@ protobuf::Arena::CreateMessage(Arena.get()); Request->set_device_id(DeviceId); - Request->set_queue_ptr((uint64_t)AsyncInfoPtr); + Request->set_queue_ptr((uint64_t)AsyncInfo); Request->set_tgt_entry_ptr( (uint64_t)RemoteEntries[DeviceId][TgtEntryPtr]); @@ -592,7 +592,7 @@ int32_t RemoteOffloadClient::runTargetTeamRegionAsync( int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, - uint64_t LoopTripcount, __tgt_async_info *AsyncInfoPtr) { + uint64_t LoopTripcount, __tgt_async_info *AsyncInfo) { return remoteCall( /* Preprocess */ [&](auto &RPCStatus, auto &Context) { @@ -601,7 +601,7 @@ protobuf::Arena::CreateMessage(Arena.get()); Request->set_device_id(DeviceId); - Request->set_queue_ptr((uint64_t)AsyncInfoPtr); + Request->set_queue_ptr((uint64_t)AsyncInfo); Request->set_tgt_entry_ptr( (uint64_t)RemoteEntries[DeviceId][TgtEntryPtr]); @@ -712,10 +712,10 @@ } int64_t RemoteClientManager::synchronize(int32_t DeviceId, - __tgt_async_info *AsyncInfoPtr) { + __tgt_async_info *AsyncInfo) { int32_t ClientIdx, DeviceIdx; std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); - return Clients[ClientIdx].synchronize(DeviceIdx, AsyncInfoPtr); + return Clients[ClientIdx].synchronize(DeviceIdx, AsyncInfo); } int32_t RemoteClientManager::isDataExchangeable(int32_t SrcDevId, @@ -741,49 +741,49 @@ int32_t RemoteClientManager::dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { + __tgt_async_info *AsyncInfo) { int32_t ClientIdx, DeviceIdx; std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); return Clients[ClientIdx].dataSubmitAsync(DeviceIdx, TgtPtr, HstPtr, Size, - AsyncInfoPtr); + AsyncInfo); } int32_t RemoteClientManager::dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { + __tgt_async_info *AsyncInfo) { int32_t ClientIdx, DeviceIdx; std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); return Clients[ClientIdx].dataRetrieveAsync(DeviceIdx, HstPtr, TgtPtr, Size, - AsyncInfoPtr); + AsyncInfo); } int32_t RemoteClientManager::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId, void *DstPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { + __tgt_async_info *AsyncInfo) { int32_t SrcClientIdx, SrcDeviceIdx, DstClientIdx, DstDeviceIdx; std::tie(SrcClientIdx, SrcDeviceIdx) = mapDeviceId(SrcDevId); std::tie(DstClientIdx, DstDeviceIdx) = mapDeviceId(DstDevId); return Clients[SrcClientIdx].dataExchangeAsync( - SrcDeviceIdx, SrcPtr, DstDeviceIdx, DstPtr, Size, AsyncInfoPtr); + SrcDeviceIdx, SrcPtr, DstDeviceIdx, DstPtr, Size, AsyncInfo); } int32_t RemoteClientManager::runTargetRegionAsync( int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, - int32_t ArgNum, __tgt_async_info *AsyncInfoPtr) { + int32_t ArgNum, __tgt_async_info *AsyncInfo) { int32_t ClientIdx, DeviceIdx; std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); return Clients[ClientIdx].runTargetRegionAsync( - DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, AsyncInfoPtr); + DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, AsyncInfo); } int32_t RemoteClientManager::runTargetTeamRegionAsync( int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, - uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) { + uint64_t LoopTripCount, __tgt_async_info *AsyncInfo) { int32_t ClientIdx, DeviceIdx; std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); return Clients[ClientIdx].runTargetTeamRegionAsync( DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit, - LoopTripCount, AsyncInfoPtr); + LoopTripCount, AsyncInfo); } diff --git a/openmp/libomptarget/plugins/remote/src/rtl.cpp b/openmp/libomptarget/plugins/remote/src/rtl.cpp --- a/openmp/libomptarget/plugins/remote/src/rtl.cpp +++ b/openmp/libomptarget/plugins/remote/src/rtl.cpp @@ -76,9 +76,8 @@ return Manager->loadBinary(DeviceId, (__tgt_device_image *)Image); } -int32_t __tgt_rtl_synchronize(int32_t DeviceId, - __tgt_async_info *AsyncInfoPtr) { - return Manager->synchronize(DeviceId, AsyncInfoPtr); +int32_t __tgt_rtl_synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo) { + return Manager->synchronize(DeviceId, AsyncInfo); } int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId) { @@ -96,8 +95,8 @@ int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { - return Manager->dataSubmitAsync(DeviceId, TgtPtr, HstPtr, Size, AsyncInfoPtr); + __tgt_async_info *AsyncInfo) { + return Manager->dataSubmitAsync(DeviceId, TgtPtr, HstPtr, Size, AsyncInfo); } int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, @@ -107,9 +106,8 @@ int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { - return Manager->dataRetrieveAsync(DeviceId, HstPtr, TgtPtr, Size, - AsyncInfoPtr); + __tgt_async_info *AsyncInfo) { + return Manager->dataRetrieveAsync(DeviceId, HstPtr, TgtPtr, Size, AsyncInfo); } int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) { @@ -125,9 +123,9 @@ int32_t __tgt_rtl_data_exchange_async(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId, void *DstPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { + __tgt_async_info *AsyncInfo) { return Manager->dataExchangeAsync(SrcDevId, SrcPtr, DstDevId, DstPtr, Size, - AsyncInfoPtr); + AsyncInfo); } int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr, @@ -140,9 +138,9 @@ int32_t __tgt_rtl_run_target_region_async(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t ArgNum, - __tgt_async_info *AsyncInfoPtr) { + __tgt_async_info *AsyncInfo) { return Manager->runTargetRegionAsync(DeviceId, TgtEntryPtr, TgtArgs, - TgtOffsets, ArgNum, AsyncInfoPtr); + TgtOffsets, ArgNum, AsyncInfo); } int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr, @@ -158,10 +156,10 @@ int32_t __tgt_rtl_run_target_team_region_async( int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, - uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) { + uint64_t LoopTripCount, __tgt_async_info *AsyncInfo) { return Manager->runTargetTeamRegionAsync( DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit, - LoopTripCount, AsyncInfoPtr); + LoopTripCount, AsyncInfo); } // Exposed library API function diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h --- a/openmp/libomptarget/src/device.h +++ b/openmp/libomptarget/src/device.h @@ -196,28 +196,28 @@ /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails. int32_t deleteData(void *TgtPtrBegin); - // Data transfer. When AsyncInfoPtr is nullptr, the transfer will be + // Data transfer. When AsyncInfo is nullptr, the transfer will be // synchronous. // Copy data from host to device int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, - __tgt_async_info *AsyncInfoPtr); + __tgt_async_info *AsyncInfo); // Copy data from device back to host int32_t retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size, - __tgt_async_info *AsyncInfoPtr); + __tgt_async_info *AsyncInfo); // Copy data from current device to destination device directly int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, int64_t Size, __tgt_async_info *AsyncInfo); int32_t runRegion(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets, - int32_t TgtVarsSize, __tgt_async_info *AsyncInfoPtr); + int32_t TgtVarsSize, __tgt_async_info *AsyncInfo); int32_t runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, int32_t NumTeams, int32_t ThreadLimit, - uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr); + uint64_t LoopTripCount, __tgt_async_info *AsyncInfo); - /// Synchronize device/queue/event based on \p AsyncInfoPtr and return + /// Synchronize device/queue/event based on \p AsyncInfo and return /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails. - int32_t synchronize(__tgt_async_info *AsyncInfoPtr); + int32_t synchronize(__tgt_async_info *AsyncInfo); private: // Call to RTL diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -415,22 +415,22 @@ // Submit data to device int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { - if (!AsyncInfoPtr || !RTL->data_submit_async || !RTL->synchronize) + __tgt_async_info *AsyncInfo) { + if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); else return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, - AsyncInfoPtr); + AsyncInfo); } // Retrieve data from device int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, - int64_t Size, __tgt_async_info *AsyncInfoPtr) { - if (!AsyncInfoPtr || !RTL->data_retrieve_async || !RTL->synchronize) + int64_t Size, __tgt_async_info *AsyncInfo) { + if (!AsyncInfo || !RTL->data_retrieve_async || !RTL->synchronize) return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); else return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, - AsyncInfoPtr); + AsyncInfo); } // Copy data from current device to destination device directly @@ -448,13 +448,13 @@ // Run region on device int32_t DeviceTy::runRegion(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, - __tgt_async_info *AsyncInfoPtr) { - if (!AsyncInfoPtr || !RTL->run_region || !RTL->synchronize) + __tgt_async_info *AsyncInfo) { + if (!AsyncInfo || !RTL->run_region || !RTL->synchronize) return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, TgtVarsSize); else return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, - TgtOffsets, TgtVarsSize, AsyncInfoPtr); + TgtOffsets, TgtVarsSize, AsyncInfo); } // Run team region on device. @@ -462,15 +462,15 @@ ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, int32_t NumTeams, int32_t ThreadLimit, uint64_t LoopTripCount, - __tgt_async_info *AsyncInfoPtr) { - if (!AsyncInfoPtr || !RTL->run_team_region_async || !RTL->synchronize) + __tgt_async_info *AsyncInfo) { + if (!AsyncInfo || !RTL->run_team_region_async || !RTL->synchronize) return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit, LoopTripCount); else return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, TgtVarsSize, NumTeams, - ThreadLimit, LoopTripCount, AsyncInfoPtr); + ThreadLimit, LoopTripCount, AsyncInfo); } // Whether data can be copied to DstDevice directly @@ -485,9 +485,9 @@ return false; } -int32_t DeviceTy::synchronize(__tgt_async_info *AsyncInfoPtr) { +int32_t DeviceTy::synchronize(__tgt_async_info *AsyncInfo) { if (RTL->synchronize) - return RTL->synchronize(RTLDeviceID, AsyncInfoPtr); + return RTL->synchronize(RTLDeviceID, AsyncInfo); return OFFLOAD_SUCCESS; }