Index: openmp/docs/design/Runtimes.rst =================================================================== --- openmp/docs/design/Runtimes.rst +++ openmp/docs/design/Runtimes.rst @@ -1160,6 +1160,7 @@ * ``LIBOMPTARGET_AMDGPU_TEAMS_PER_CU`` * ``LIBOMPTARGET_AMDGPU_MAX_ASYNC_COPY_BYTES`` * ``LIBOMPTARGET_AMDGPU_NUM_INITIAL_HSA_SIGNALS`` +* ``LIBOMPTARGET_FORCE_SYNCHRONIZE`` The environment variables ``LIBOMPTARGET_SHARED_MEMORY_SIZE``, ``LIBOMPTARGET_STACK_SIZE`` and ``LIBOMPTARGET_HEAP_SIZE`` are described in @@ -1238,6 +1239,14 @@ streams. More HSA signals will be created dynamically throughout the execution if needed. The default value is ``64``. +LIBOMPTARGET_FORCE_SYNCHRONIZE +"""""""""""""""""""""""""""""" + +This environment variable causes the NextGen plugin to synchronize immediately +after a kernel is launched or after a data transfer, instead of the default +behavior. Doing so aims to make identifying the source of code crashes +easier. + .. _remote_offloading_plugin: Remote Offloading Plugin: Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -207,13 +207,16 @@ void AsyncInfoWrapperTy::finalize(Error &Err) { assert(AsyncInfoPtr && "AsyncInfoWrapperTy already finalized"); + static BoolEnvar ForceSynchronize("LIBOMPTARGET_FORCE_SYNCHRONIZE"); // If we used a local async info object we want synchronous behavior. In that // case, and assuming the current status code is correct, we will synchronize // explicitly when the object is deleted. Update the error with the result of // the synchronize operation. - if (AsyncInfoPtr == &LocalAsyncInfo && LocalAsyncInfo.Queue && !Err) - Err = Device.synchronize(&LocalAsyncInfo); + // Optionally, if the LIBOMPTARGET_FORCE_SYNCHRONIZE env variable is set, + // synchronize regardless of the local async info. + if ((ForceSynchronize || AsyncInfoPtr == &LocalAsyncInfo) && AsyncInfoPtr->Queue && !Err) + Err = Device.synchronize(AsyncInfoPtr); // Invalidate the wrapper object. AsyncInfoPtr = nullptr; @@ -926,6 +929,7 @@ AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); auto Err = dataSubmitImpl(TgtPtr, HstPtr, Size, AsyncInfoWrapper); + AsyncInfoWrapper.finalize(Err); return Err; } @@ -935,6 +939,7 @@ AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); auto Err = dataRetrieveImpl(HstPtr, TgtPtr, Size, AsyncInfoWrapper); + AsyncInfoWrapper.finalize(Err); return Err; } @@ -945,6 +950,7 @@ AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); auto Err = dataExchangeImpl(SrcPtr, DstDev, DstPtr, Size, AsyncInfoWrapper); + AsyncInfoWrapper.finalize(Err); return Err; }