diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -494,8 +494,11 @@ /// between asynchronous operations: kernel launches and memory transfers. struct AMDGPUSignalTy { /// Create an empty signal. - AMDGPUSignalTy() : Signal({0}), UseCount() {} - AMDGPUSignalTy(AMDGPUDeviceTy &Device) : Signal({0}), UseCount() {} + AMDGPUSignalTy() : Signal({0}), UseCount(), MicrosToWait(0) {} + AMDGPUSignalTy(AMDGPUDeviceTy &Device) + : Signal({0}), UseCount(), MicrosToWait(0) {} + AMDGPUSignalTy(uint64_t MicrosToWait) + : Signal({0}), UseCount(), MicrosToWait(MicrosToWait) {} /// Initialize the signal with an initial value. Error init(uint32_t InitialValue = 1) { @@ -512,11 +515,10 @@ /// Wait until the signal gets a zero value. Error wait() const { - // TODO: Is it better to use busy waiting or blocking the thread? - while (hsa_signal_wait_scacquire(Signal, HSA_SIGNAL_CONDITION_EQ, 0, - UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) - ; - return Plugin::success(); + if (MicrosToWait) + return activeWaitImpl(); + + return waitImpl(); } /// Load the value on the signal. @@ -548,6 +550,33 @@ /// Reference counter for tracking the concurrent use count. This is mainly /// used for knowing how many streams are using the signal. RefCountTy<> UseCount; + + /// Microseconds to stay in HSA_WAIT_STATE_ACTIVE before switching to blocking + uint64_t MicrosToWait; + + /// Blocking the waiting thread + Error waitImpl() const { + // TODO: Is it better to use busy waiting or blocking the thread? + while (hsa_signal_wait_scacquire(Signal, HSA_SIGNAL_CONDITION_EQ, 0, + UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) + ; + return Plugin::success(); + } + + /// Switch to blocking wait state after specified timeout + Error activeWaitImpl(hsa_signal_value_t Init = 1) const { + hsa_signal_value_t Got = Init; + hsa_signal_value_t Success = 0; + if (MicrosToWait) { + Got = hsa_signal_wait_scacquire(Signal, HSA_SIGNAL_CONDITION_NE, Init, + MicrosToWait, HSA_WAIT_STATE_ACTIVE); + if (Got == Success) { + return Plugin::success(); + } + } + // Switch to blocked state + return waitImpl(); + } }; /// Classes for holding AMDGPU signals and managing signals. @@ -1931,7 +1960,8 @@ Plugin::check(Status, "Error in hsa_amd_memory_lock: %s\n")) return Err; - AMDGPUSignalTy Signal; + /* Example use for microseconds to wait */ + AMDGPUSignalTy Signal(300000); if (auto Err = Signal.init()) return Err;