Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h =================================================================== --- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -529,6 +529,11 @@ /// Register the offload entries for a specific image on the device. Error registerOffloadEntries(DeviceImageTy &Image); + /// Force a synchronization if the 'LIBOMPTARGET_FORCE_SYNCHRONIZE" + /// environment variable is set. + // Error forceSynchronize(__tgt_async_info *AsyncInfo); + void checkForForceSynchronize(__tgt_async_info *AsyncInfo); + /// Synchronize the current thread with the pending operations on the /// __tgt_async_info structure. Error synchronize(__tgt_async_info *AsyncInfo); Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -247,6 +247,18 @@ return Plugin::success(); } +void GenericDeviceTy::checkForForceSynchronize(__tgt_async_info *AsyncInfo) { + if (std::getenv("LIBOMPTARGET_FORCE_SYNCHRONIZE")) { + if (AsyncInfo) { + auto SyncErr = synchronize(AsyncInfo); + if (SyncErr) { + REPORT("Failure to synchronize stream %p: %s\n", AsyncInfo->Queue, + toString(std::move(SyncErr)).data()); + } + } + } +} + Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs, ptrdiff_t *ArgOffsets, KernelArgsTy &KernelArgs, AsyncInfoWrapperTy &AsyncInfoWrapper) const { @@ -926,6 +938,8 @@ AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); auto Err = dataSubmitImpl(TgtPtr, HstPtr, Size, AsyncInfoWrapper); + checkForForceSynchronize(AsyncInfo); + AsyncInfoWrapper.finalize(Err); return Err; } @@ -935,6 +949,8 @@ AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); auto Err = dataRetrieveImpl(HstPtr, TgtPtr, Size, AsyncInfoWrapper); + checkForForceSynchronize(AsyncInfo); + AsyncInfoWrapper.finalize(Err); return Err; } @@ -945,6 +961,8 @@ AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); auto Err = dataExchangeImpl(SrcPtr, DstDev, DstPtr, Size, AsyncInfoWrapper); + checkForForceSynchronize(AsyncInfo); + AsyncInfoWrapper.finalize(Err); return Err; } @@ -966,6 +984,7 @@ auto Err = GenericKernel.launch(*this, ArgPtrs, ArgOffsets, KernelArgs, AsyncInfoWrapper); + checkForForceSynchronize(AsyncInfo); if (RecordReplay.isRecordingOrReplaying() && RecordReplay.isSaveOutputEnabled()) Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -463,6 +463,7 @@ /// Synchronize current thread with the pending operations on the async info. Error synchronizeImpl(__tgt_async_info &AsyncInfo) override { + CUstream Stream = reinterpret_cast(AsyncInfo.Queue); CUresult Res = cuStreamSynchronize(Stream); @@ -847,6 +848,10 @@ /* gridDimZ */ 1, NumThreads, /* blockDimY */ 1, /* blockDimZ */ 1, MaxDynCGroupMem, Stream, (void **)Args, nullptr); + + //Plugin::check(cuStreamSynchronize(Stream), "Error in stream synchronize for '%s': %s", getName()); + //cuStreamSynchronize(Stream); + return Plugin::check(Res, "Error in cuLaunchKernel for '%s': %s", getName()); }