Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -540,6 +540,12 @@ /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; + + /// \returns True if waitcnt instruction is needed before barrier instruction, + /// false otherwise. + bool needWaitcntBeforeBarrier() const { + return true; + } }; } // End namespace llvm Index: llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -590,8 +590,9 @@ // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish, // but we also want to wait for any other outstanding transfers before // signalling other hardware blocks - if (I->getOpcode() == AMDGPU::S_BARRIER || - I->getOpcode() == AMDGPU::S_SENDMSG) + if ((I->getOpcode() == AMDGPU::S_BARRIER && + ST->needWaitcntBeforeBarrier()) || + I->getOpcode() == AMDGPU::S_SENDMSG) Required = LastIssued; else Required = handleOperands(*I);